feat: Bot-Code + Core-Module persistent aufgenommen
This commit is contained in:
parent
cf5192c5b0
commit
17d51e6924
10 changed files with 581 additions and 567 deletions
|
|
@ -1,159 +0,0 @@
|
||||||
"""Intelligente Kontext-Sammlung für den Hausmeister-Bot.
|
|
||||||
Entscheidet anhand der Frage welche Datenquellen abgefragt werden."""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
|
||||||
from core import config, loki_client, proxmox_client
|
|
||||||
|
|
||||||
|
|
||||||
def _load_config():
|
|
||||||
return config.parse_config()
|
|
||||||
|
|
||||||
|
|
||||||
def _get_tokens(cfg):
|
|
||||||
tokens = {}
|
|
||||||
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
|
|
||||||
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
|
|
||||||
if tn and tv:
|
|
||||||
tokens["pve-hetzner"] = {"name": tn, "value": tv}
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
|
|
||||||
def _get_passwords(cfg):
|
|
||||||
return {
|
|
||||||
"pve-hetzner": cfg.passwords.get("hetzner", ""),
|
|
||||||
"pve1": cfg.passwords.get("default", ""),
|
|
||||||
"pve3": cfg.passwords.get("default", ""),
|
|
||||||
"default": cfg.passwords.get("default", ""),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def gather_status() -> str:
|
|
||||||
"""Komplett-Status aller Container für /status."""
|
|
||||||
cfg = _load_config()
|
|
||||||
containers = proxmox_client.get_all_containers(
|
|
||||||
_get_passwords(cfg), _get_tokens(cfg)
|
|
||||||
)
|
|
||||||
return proxmox_client.format_containers(containers)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_errors(hours: float = 2) -> str:
|
|
||||||
"""Aktuelle Fehler aus Loki für /errors."""
|
|
||||||
entries = loki_client.get_errors(hours=hours, limit=30)
|
|
||||||
return loki_client.format_logs(entries)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_container_status(query: str) -> str:
|
|
||||||
"""Status eines einzelnen Containers."""
|
|
||||||
cfg = _load_config()
|
|
||||||
vmid = None
|
|
||||||
name = None
|
|
||||||
|
|
||||||
m = re.search(r'\b(\d{3})\b', query)
|
|
||||||
if m:
|
|
||||||
vmid = int(m.group(1))
|
|
||||||
else:
|
|
||||||
name = query.strip()
|
|
||||||
|
|
||||||
ct = config.get_container(cfg, vmid=vmid, name=name)
|
|
||||||
if not ct:
|
|
||||||
return f"Container nicht gefunden: {query}"
|
|
||||||
|
|
||||||
host_ip = proxmox_client.PROXMOX_HOSTS.get(ct.host)
|
|
||||||
if not host_ip:
|
|
||||||
return f"Host nicht erreichbar: {ct.host}"
|
|
||||||
|
|
||||||
token = _get_tokens(cfg).get(ct.host, {})
|
|
||||||
pw = _get_passwords(cfg).get(ct.host, "")
|
|
||||||
try:
|
|
||||||
client = proxmox_client.ProxmoxClient(
|
|
||||||
host_ip, password=pw,
|
|
||||||
token_name=token.get("name", ""),
|
|
||||||
token_value=token.get("value", ""),
|
|
||||||
)
|
|
||||||
status = client.get_container_status(ct.vmid)
|
|
||||||
except Exception as e:
|
|
||||||
return f"Proxmox-Fehler: {e}"
|
|
||||||
|
|
||||||
mem_mb = status.get("mem", 0) // (1024 * 1024)
|
|
||||||
maxmem_mb = status.get("maxmem", 0) // (1024 * 1024)
|
|
||||||
uptime_h = status.get("uptime", 0) // 3600
|
|
||||||
|
|
||||||
return (
|
|
||||||
f"CT {ct.vmid} — {ct.name}\n"
|
|
||||||
f"Host: {ct.host}\n"
|
|
||||||
f"Status: {status.get('status', '?')}\n"
|
|
||||||
f"RAM: {mem_mb}/{maxmem_mb} MB\n"
|
|
||||||
f"CPU: {status.get('cpus', '?')} Kerne\n"
|
|
||||||
f"Uptime: {uptime_h}h\n"
|
|
||||||
f"Tailscale: {ct.tailscale_ip or '—'}\n"
|
|
||||||
f"Dienste: {ct.services}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_logs(container: str, hours: float = 1) -> str:
|
|
||||||
"""Logs eines Containers aus Loki."""
|
|
||||||
entries = loki_client.query_logs(
|
|
||||||
f'{{host="{container}"}}', hours=hours, limit=20
|
|
||||||
)
|
|
||||||
return loki_client.format_logs(entries)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_health(container: str) -> str:
|
|
||||||
"""Health-Check eines Containers."""
|
|
||||||
health = loki_client.get_health(container, hours=24)
|
|
||||||
status_emoji = {"healthy": "✅", "warning": "⚠️", "critical": "🔴"}.get(
|
|
||||||
health.get("status", ""), "❓"
|
|
||||||
)
|
|
||||||
return (
|
|
||||||
f"{status_emoji} {health.get('host', container)}\n"
|
|
||||||
f"Status: {health.get('status', '?')}\n"
|
|
||||||
f"Fehler (24h): {health.get('errors_last_{hours}h', '?')}\n"
|
|
||||||
f"Sendet Logs: {'ja' if health.get('sending_logs') else 'nein'}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_silence() -> str:
|
|
||||||
"""Welche Hosts senden keine Logs?"""
|
|
||||||
silent = loki_client.check_silence(minutes=35)
|
|
||||||
if not silent:
|
|
||||||
return "✅ Alle Hosts senden Logs."
|
|
||||||
if silent and "error" in silent[0]:
|
|
||||||
return f"Fehler: {silent[0]['error']}"
|
|
||||||
lines = ["⚠️ Stille Hosts (keine Logs seit 35+ Min):\n"]
|
|
||||||
for s in silent:
|
|
||||||
lines.append(f" • {s['host']}")
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
def gather_context_for_question(question: str) -> str:
|
|
||||||
"""Sammelt relevanten Kontext für eine Freitext-Frage."""
|
|
||||||
q = question.lower()
|
|
||||||
parts = []
|
|
||||||
|
|
||||||
if any(w in q for w in ["fehler", "error", "problem", "kaputt", "down"]):
|
|
||||||
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=2))
|
|
||||||
|
|
||||||
if any(w in q for w in ["status", "läuft", "container", "übersicht", "alles"]):
|
|
||||||
parts.append("=== Container Status ===\n" + gather_status())
|
|
||||||
|
|
||||||
if any(w in q for w in ["still", "silence", "stumm", "logs"]):
|
|
||||||
parts.append("=== Stille Hosts ===\n" + gather_silence())
|
|
||||||
|
|
||||||
ct_match = re.search(r'\bct[- ]?(\d{3})\b', q)
|
|
||||||
if ct_match:
|
|
||||||
parts.append(f"=== CT {ct_match.group(1)} ===\n" + gather_container_status(ct_match.group(1)))
|
|
||||||
|
|
||||||
for name in ["wordpress", "rss", "seafile", "forgejo", "portainer",
|
|
||||||
"fuenfvoracht", "redax", "flugscanner", "edelmetall"]:
|
|
||||||
if name in q:
|
|
||||||
parts.append(f"=== {name} ===\n" + gather_container_status(name))
|
|
||||||
|
|
||||||
if not parts:
|
|
||||||
parts.append("=== Container Status ===\n" + gather_status())
|
|
||||||
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=1))
|
|
||||||
|
|
||||||
return "\n\n".join(parts)
|
|
||||||
1
homelab-ai-bot/core/__init__.py
Normal file
1
homelab-ai-bot/core/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
|
||||||
179
homelab-ai-bot/core/config.py
Normal file
179
homelab-ai-bot/core/config.py
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
"""Parses homelab.conf — the single source of truth for infrastructure facts."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
HOMELAB_CONF_PATHS = [
|
||||||
|
Path("/root/homelab-brain/homelab.conf"),
|
||||||
|
Path("/opt/homelab-brain/homelab.conf"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Container:
|
||||||
|
vmid: int
|
||||||
|
name: str
|
||||||
|
tailscale_ip: str
|
||||||
|
services: str
|
||||||
|
host: str # pve-hetzner, pve1, pve3
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Tunnel:
|
||||||
|
ct_id: int
|
||||||
|
domain: str
|
||||||
|
target: str
|
||||||
|
status: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class HomelabConfig:
|
||||||
|
raw: dict = field(default_factory=dict)
|
||||||
|
domains: dict = field(default_factory=dict)
|
||||||
|
servers: dict = field(default_factory=dict)
|
||||||
|
passwords: dict = field(default_factory=dict)
|
||||||
|
containers: list = field(default_factory=list)
|
||||||
|
telegram: dict = field(default_factory=dict)
|
||||||
|
api_keys: dict = field(default_factory=dict)
|
||||||
|
tunnels: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_container_sections(path: Path) -> dict:
|
||||||
|
"""Parse section comments to determine which host each CT_ variable belongs to."""
|
||||||
|
section_map = {
|
||||||
|
"pve-hetzner": re.compile(r"#.*CONTAINER.*pve-hetzner", re.IGNORECASE),
|
||||||
|
"pve1": re.compile(r"#.*CONTAINER.*pve1", re.IGNORECASE),
|
||||||
|
"pve3": re.compile(r"#.*CONTAINER.*pve3", re.IGNORECASE),
|
||||||
|
}
|
||||||
|
ct_var = re.compile(r"^(CT_\d+(?:_\w+)?)\s*=")
|
||||||
|
result = {}
|
||||||
|
current_host = "pve-hetzner"
|
||||||
|
|
||||||
|
with open(path) as f:
|
||||||
|
for line in f:
|
||||||
|
stripped = line.strip()
|
||||||
|
if stripped.startswith("#"):
|
||||||
|
for host, pattern in section_map.items():
|
||||||
|
if pattern.search(stripped):
|
||||||
|
current_host = host
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
m = ct_var.match(stripped)
|
||||||
|
if m:
|
||||||
|
result[m.group(1)] = current_host
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def find_config() -> Path:
|
||||||
|
for p in HOMELAB_CONF_PATHS:
|
||||||
|
if p.exists():
|
||||||
|
return p
|
||||||
|
raise FileNotFoundError(f"homelab.conf not found in {HOMELAB_CONF_PATHS}")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_config(path: Path = None) -> HomelabConfig:
|
||||||
|
if path is None:
|
||||||
|
path = find_config()
|
||||||
|
|
||||||
|
raw = {}
|
||||||
|
with open(path) as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)="?(.*?)"?\s*$', line)
|
||||||
|
if m:
|
||||||
|
raw[m.group(1)] = m.group(2)
|
||||||
|
|
||||||
|
cfg = HomelabConfig(raw=raw)
|
||||||
|
|
||||||
|
for k, v in raw.items():
|
||||||
|
if k.startswith("DOMAIN_"):
|
||||||
|
cfg.domains[k.replace("DOMAIN_", "").lower()] = v
|
||||||
|
elif k.startswith("SRV_"):
|
||||||
|
cfg.servers[k.replace("SRV_", "").lower()] = v
|
||||||
|
elif k.startswith("PW_"):
|
||||||
|
cfg.passwords[k.replace("PW_", "").lower()] = v
|
||||||
|
elif k.startswith("TG_"):
|
||||||
|
cfg.telegram[k.lower()] = v
|
||||||
|
elif k.startswith("FORGEJO_") or k.startswith("GITHUB_") or k.startswith("OPENROUTER_"):
|
||||||
|
cfg.api_keys[k.lower()] = v
|
||||||
|
|
||||||
|
ct_pattern = re.compile(r"^CT_(\d+)(?:_(PVE\d+))?$")
|
||||||
|
section_hosts = _parse_container_sections(path)
|
||||||
|
|
||||||
|
for k, v in raw.items():
|
||||||
|
m = ct_pattern.match(k)
|
||||||
|
if m:
|
||||||
|
vmid = int(m.group(1))
|
||||||
|
explicit_host = m.group(2)
|
||||||
|
if explicit_host:
|
||||||
|
host = {"PVE1": "pve1", "PVE3": "pve3"}.get(explicit_host, explicit_host.lower())
|
||||||
|
else:
|
||||||
|
host = section_hosts.get(k, "pve-hetzner")
|
||||||
|
|
||||||
|
parts = v.split("|")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
cfg.containers.append(Container(
|
||||||
|
vmid=vmid,
|
||||||
|
name=parts[0],
|
||||||
|
tailscale_ip=parts[1] if parts[1] != "—" else "",
|
||||||
|
services=parts[2],
|
||||||
|
host=host,
|
||||||
|
))
|
||||||
|
|
||||||
|
for k, v in raw.items():
|
||||||
|
m = re.match(r"^TUNNEL_(\d+)(?:_\w+)?$", k)
|
||||||
|
if m:
|
||||||
|
ct_id = int(m.group(1))
|
||||||
|
parts = v.split("|")
|
||||||
|
if len(parts) >= 3:
|
||||||
|
cfg.tunnels.append(Tunnel(
|
||||||
|
ct_id=ct_id,
|
||||||
|
domain=parts[0],
|
||||||
|
target=parts[1],
|
||||||
|
status=parts[2],
|
||||||
|
))
|
||||||
|
|
||||||
|
cfg.containers.sort(key=lambda c: (c.host, c.vmid))
|
||||||
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
def get_container(cfg: HomelabConfig, vmid: int = None, name: str = None) -> Container | None:
|
||||||
|
for c in cfg.containers:
|
||||||
|
if vmid and c.vmid == vmid:
|
||||||
|
return c
|
||||||
|
if name and name.lower() in c.name.lower():
|
||||||
|
return c
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def format_overview(cfg: HomelabConfig) -> str:
|
||||||
|
lines = ["# Homelab Infrastructure (from homelab.conf)\n"]
|
||||||
|
|
||||||
|
lines.append("## Domains")
|
||||||
|
for k, v in cfg.domains.items():
|
||||||
|
lines.append(f"- {k}: {v}")
|
||||||
|
|
||||||
|
lines.append("\n## Servers (Tailscale)")
|
||||||
|
for k, v in cfg.servers.items():
|
||||||
|
lines.append(f"- {k}: {v}")
|
||||||
|
|
||||||
|
current_host = None
|
||||||
|
for c in cfg.containers:
|
||||||
|
if c.host != current_host:
|
||||||
|
current_host = c.host
|
||||||
|
lines.append(f"\n## Containers on {current_host}")
|
||||||
|
lines.append("| CT | Name | Tailscale | Services |")
|
||||||
|
lines.append("|---|---|---|---|")
|
||||||
|
ts = c.tailscale_ip or "—"
|
||||||
|
lines.append(f"| {c.vmid} | {c.name} | {ts} | {c.services} |")
|
||||||
|
|
||||||
|
if cfg.tunnels:
|
||||||
|
lines.append("\n## Cloudflare Tunnels")
|
||||||
|
for t in cfg.tunnels:
|
||||||
|
lines.append(f"- CT {t.ct_id}: {t.domain} → {t.target} ({t.status})")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
130
homelab-ai-bot/core/loki_client.py
Normal file
130
homelab-ai-bot/core/loki_client.py
Normal file
|
|
@ -0,0 +1,130 @@
|
||||||
|
"""Loki API client for querying centralized logs."""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
LOKI_URL = "http://100.109.206.43:3100"
|
||||||
|
|
||||||
|
|
||||||
|
def _query(endpoint: str, params: dict, base_url: str = None) -> dict:
|
||||||
|
url = f"{base_url or LOKI_URL}{endpoint}"
|
||||||
|
try:
|
||||||
|
r = requests.get(url, params=params, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
def _ns(dt: datetime) -> str:
|
||||||
|
return str(int(dt.timestamp() * 1e9))
|
||||||
|
|
||||||
|
|
||||||
|
def query_logs(query: str, hours: float = 1, limit: int = 100) -> list[dict]:
|
||||||
|
"""Run a LogQL query and return log entries."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
start = now - timedelta(hours=hours)
|
||||||
|
data = _query("/loki/api/v1/query_range", {
|
||||||
|
"query": query,
|
||||||
|
"start": _ns(start),
|
||||||
|
"end": _ns(now),
|
||||||
|
"limit": limit,
|
||||||
|
"direction": "backward",
|
||||||
|
})
|
||||||
|
if "error" in data:
|
||||||
|
return [{"error": data["error"]}]
|
||||||
|
|
||||||
|
entries = []
|
||||||
|
for stream in data.get("data", {}).get("result", []):
|
||||||
|
labels = stream.get("stream", {})
|
||||||
|
for ts, line in stream.get("values", []):
|
||||||
|
entries.append({
|
||||||
|
"timestamp": ts,
|
||||||
|
"host": labels.get("host", labels.get("job", "unknown")),
|
||||||
|
"line": line,
|
||||||
|
})
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def get_errors(container: str = None, hours: float = 1, limit: int = 50) -> list[dict]:
|
||||||
|
"""Get error-level logs, optionally filtered by container hostname."""
|
||||||
|
if container:
|
||||||
|
q = f'{{host="{container}"}} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied"'
|
||||||
|
else:
|
||||||
|
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied"'
|
||||||
|
return query_logs(q, hours=hours, limit=limit)
|
||||||
|
|
||||||
|
|
||||||
|
def get_labels() -> list[str]:
|
||||||
|
"""Get all available label values for 'host'."""
|
||||||
|
data = _query("/loki/api/v1/label/host/values", {})
|
||||||
|
if "error" in data:
|
||||||
|
return []
|
||||||
|
return data.get("data", [])
|
||||||
|
|
||||||
|
|
||||||
|
def check_silence(minutes: int = 35) -> list[dict]:
|
||||||
|
"""Find hosts that haven't sent logs within the given timeframe."""
|
||||||
|
all_hosts = get_labels()
|
||||||
|
if not all_hosts:
|
||||||
|
return [{"error": "Could not fetch host labels from Loki"}]
|
||||||
|
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
start = now - timedelta(minutes=minutes)
|
||||||
|
silent = []
|
||||||
|
|
||||||
|
for host in all_hosts:
|
||||||
|
data = _query("/loki/api/v1/query_range", {
|
||||||
|
"query": f'count_over_time({{host="{host}"}}[{minutes}m])',
|
||||||
|
"start": _ns(start),
|
||||||
|
"end": _ns(now),
|
||||||
|
"limit": 1,
|
||||||
|
})
|
||||||
|
results = data.get("data", {}).get("result", [])
|
||||||
|
has_logs = any(
|
||||||
|
int(v[1]) > 0
|
||||||
|
for r in results
|
||||||
|
for v in r.get("values", [])
|
||||||
|
)
|
||||||
|
if not has_logs:
|
||||||
|
silent.append({"host": host, "silent_minutes": minutes})
|
||||||
|
|
||||||
|
return silent
|
||||||
|
|
||||||
|
|
||||||
|
def get_health(container: str, hours: float = 24) -> dict:
|
||||||
|
"""Get a health summary for a specific container."""
|
||||||
|
errors = get_errors(container=container, hours=hours, limit=200)
|
||||||
|
error_count = len([e for e in errors if "error" not in e])
|
||||||
|
|
||||||
|
recent = query_logs(f'{{host="{container}"}}', hours=0.5, limit=5)
|
||||||
|
has_recent = len([e for e in recent if "error" not in e]) > 0
|
||||||
|
|
||||||
|
return {
|
||||||
|
"host": container,
|
||||||
|
"errors_last_{hours}h": error_count,
|
||||||
|
"sending_logs": has_recent,
|
||||||
|
"status": "healthy" if error_count < 5 and has_recent else
|
||||||
|
"warning" if error_count < 20 else "critical",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def format_logs(entries: list[dict], max_lines: int = 30) -> str:
|
||||||
|
"""Format log entries for human/LLM consumption."""
|
||||||
|
if not entries:
|
||||||
|
return "No log entries found."
|
||||||
|
if entries and "error" in entries[0]:
|
||||||
|
return f"Loki error: {entries[0]['error']}"
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for e in entries[:max_lines]:
|
||||||
|
host = e.get("host", "?")
|
||||||
|
line = e.get("line", "").strip()
|
||||||
|
if len(line) > 200:
|
||||||
|
line = line[:200] + "..."
|
||||||
|
lines.append(f"[{host}] {line}")
|
||||||
|
|
||||||
|
total = len(entries)
|
||||||
|
if total > max_lines:
|
||||||
|
lines.append(f"\n... and {total - max_lines} more entries")
|
||||||
|
return "\n".join(lines)
|
||||||
133
homelab-ai-bot/core/prometheus_client.py
Normal file
133
homelab-ai-bot/core/prometheus_client.py
Normal file
|
|
@ -0,0 +1,133 @@
|
||||||
|
"""Prometheus API client for querying system metrics."""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
PROMETHEUS_URL = "http://100.88.230.59:9090"
|
||||||
|
|
||||||
|
|
||||||
|
def _query(endpoint: str, params: dict, base_url: str = None) -> dict:
|
||||||
|
url = f"{base_url or PROMETHEUS_URL}{endpoint}"
|
||||||
|
try:
|
||||||
|
r = requests.get(url, params=params, timeout=10)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json()
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return {"error": str(e), "status": "unavailable"}
|
||||||
|
|
||||||
|
|
||||||
|
def instant_query(query: str) -> dict:
|
||||||
|
"""Run an instant PromQL query."""
|
||||||
|
return _query("/api/v1/query", {"query": query})
|
||||||
|
|
||||||
|
|
||||||
|
def range_query(query: str, hours: float = 1, step: str = "5m") -> dict:
|
||||||
|
"""Run a range PromQL query."""
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
start = now - timedelta(hours=hours)
|
||||||
|
return _query("/api/v1/query_range", {
|
||||||
|
"query": query,
|
||||||
|
"start": start.isoformat(),
|
||||||
|
"end": now.isoformat(),
|
||||||
|
"step": step,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def get_targets() -> list[dict]:
|
||||||
|
"""Get all Prometheus scrape targets with their status."""
|
||||||
|
data = _query("/api/v1/targets", {})
|
||||||
|
if "error" in data:
|
||||||
|
return [{"error": data["error"]}]
|
||||||
|
|
||||||
|
targets = []
|
||||||
|
for t in data.get("data", {}).get("activeTargets", []):
|
||||||
|
targets.append({
|
||||||
|
"job": t.get("labels", {}).get("job", "unknown"),
|
||||||
|
"instance": t.get("labels", {}).get("instance", "unknown"),
|
||||||
|
"health": t.get("health", "unknown"),
|
||||||
|
"last_scrape": t.get("lastScrape", ""),
|
||||||
|
})
|
||||||
|
return targets
|
||||||
|
|
||||||
|
|
||||||
|
def is_available() -> bool:
|
||||||
|
"""Check if Prometheus is reachable."""
|
||||||
|
data = _query("/api/v1/query", {"query": "up"})
|
||||||
|
return "error" not in data or data.get("status") == "success"
|
||||||
|
|
||||||
|
|
||||||
|
def get_cpu(instance: str = None, hours: float = 1) -> dict:
|
||||||
|
"""Get CPU usage. If instance given, filter to that instance."""
|
||||||
|
if instance:
|
||||||
|
q = f'100 - (avg by (instance) (rate(node_cpu_seconds_total{{mode="idle", instance=~"{instance}.*"}}[5m])) * 100)'
|
||||||
|
else:
|
||||||
|
q = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
|
||||||
|
data = instant_query(q)
|
||||||
|
return _extract_metrics(data, "cpu_percent")
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory(instance: str = None) -> dict:
|
||||||
|
"""Get memory usage percentage."""
|
||||||
|
if instance:
|
||||||
|
q = f'(1 - node_memory_MemAvailable_bytes{{instance=~"{instance}.*"}} / node_memory_MemTotal_bytes{{instance=~"{instance}.*"}}) * 100'
|
||||||
|
else:
|
||||||
|
q = '(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100'
|
||||||
|
data = instant_query(q)
|
||||||
|
return _extract_metrics(data, "memory_percent")
|
||||||
|
|
||||||
|
|
||||||
|
def get_disk(instance: str = None) -> dict:
|
||||||
|
"""Get root filesystem usage percentage."""
|
||||||
|
if instance:
|
||||||
|
q = f'(1 - node_filesystem_avail_bytes{{mountpoint="/", instance=~"{instance}.*"}} / node_filesystem_size_bytes{{mountpoint="/", instance=~"{instance}.*"}}) * 100'
|
||||||
|
else:
|
||||||
|
q = '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100'
|
||||||
|
data = instant_query(q)
|
||||||
|
return _extract_metrics(data, "disk_percent")
|
||||||
|
|
||||||
|
|
||||||
|
def get_overview() -> str:
|
||||||
|
"""Get a formatted overview of all monitored instances."""
|
||||||
|
if not is_available():
|
||||||
|
return "Prometheus is not reachable at " + PROMETHEUS_URL
|
||||||
|
|
||||||
|
targets = get_targets()
|
||||||
|
if not targets:
|
||||||
|
return "No Prometheus targets found."
|
||||||
|
|
||||||
|
lines = ["## Prometheus Targets\n"]
|
||||||
|
for t in targets:
|
||||||
|
status = "UP" if t["health"] == "up" else "DOWN"
|
||||||
|
lines.append(f"- [{status}] {t['job']} ({t['instance']})")
|
||||||
|
|
||||||
|
cpu = get_cpu()
|
||||||
|
if "results" in cpu:
|
||||||
|
lines.append("\n## CPU Usage")
|
||||||
|
for r in cpu["results"]:
|
||||||
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
||||||
|
|
||||||
|
mem = get_memory()
|
||||||
|
if "results" in mem:
|
||||||
|
lines.append("\n## Memory Usage")
|
||||||
|
for r in mem["results"]:
|
||||||
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
||||||
|
|
||||||
|
disk = get_disk()
|
||||||
|
if "results" in disk:
|
||||||
|
lines.append("\n## Disk Usage (/)")
|
||||||
|
for r in disk["results"]:
|
||||||
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_metrics(data: dict, metric_name: str) -> dict:
|
||||||
|
if "error" in data and data.get("status") != "success":
|
||||||
|
return {"error": data.get("error", "unknown error")}
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for r in data.get("data", {}).get("result", []):
|
||||||
|
instance = r.get("metric", {}).get("instance", "unknown")
|
||||||
|
value = float(r.get("value", [0, 0])[1])
|
||||||
|
results.append({"instance": instance, "value": value, "metric": metric_name})
|
||||||
|
return {"results": results}
|
||||||
138
homelab-ai-bot/core/proxmox_client.py
Normal file
138
homelab-ai-bot/core/proxmox_client.py
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
"""Proxmox REST API client for querying infrastructure state."""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import urllib3
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
PROXMOX_HOSTS = {
|
||||||
|
"pve-hetzner": "100.88.230.59",
|
||||||
|
"pve1": "100.122.56.60",
|
||||||
|
"pve3": "100.109.101.12",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class ProxmoxClient:
|
||||||
|
def __init__(self, host_ip: str, user: str = "root@pam",
|
||||||
|
password: str = "", token_name: str = "", token_value: str = ""):
|
||||||
|
self.base_url = f"https://{host_ip}:8006/api2/json"
|
||||||
|
self.user = user
|
||||||
|
self.password = password
|
||||||
|
self.token_name = token_name
|
||||||
|
self.token_value = token_value
|
||||||
|
self._ticket = None
|
||||||
|
self._csrf = None
|
||||||
|
|
||||||
|
def _auth_header(self) -> dict:
|
||||||
|
if self.token_name and self.token_value:
|
||||||
|
return {"Authorization": f"PVEAPIToken={self.user}!{self.token_name}={self.token_value}"}
|
||||||
|
if self._ticket:
|
||||||
|
return {}
|
||||||
|
try:
|
||||||
|
r = requests.post(
|
||||||
|
f"{self.base_url}/access/ticket",
|
||||||
|
data={"username": self.user, "password": self.password},
|
||||||
|
verify=False, timeout=10,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
data = r.json()["data"]
|
||||||
|
self._ticket = data["ticket"]
|
||||||
|
self._csrf = data["CSRFPreventionToken"]
|
||||||
|
except requests.RequestException as e:
|
||||||
|
raise ConnectionError(f"Proxmox auth failed for {self.base_url}: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def _get(self, path: str) -> dict:
|
||||||
|
headers = self._auth_header()
|
||||||
|
cookies = {}
|
||||||
|
if self._ticket:
|
||||||
|
cookies["PVEAuthCookie"] = self._ticket
|
||||||
|
headers["CSRFPreventionToken"] = self._csrf
|
||||||
|
r = requests.get(
|
||||||
|
f"{self.base_url}{path}",
|
||||||
|
cookies=cookies, headers=headers,
|
||||||
|
verify=False, timeout=10,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
return r.json().get("data", {})
|
||||||
|
|
||||||
|
def get_node_status(self) -> dict:
|
||||||
|
nodes = self._get("/nodes")
|
||||||
|
if isinstance(nodes, list):
|
||||||
|
return nodes[0] if nodes else {}
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
def get_containers(self) -> list[dict]:
|
||||||
|
nodes = self._get("/nodes")
|
||||||
|
if not isinstance(nodes, list):
|
||||||
|
return []
|
||||||
|
node_name = nodes[0]["node"]
|
||||||
|
return self._get(f"/nodes/{node_name}/lxc")
|
||||||
|
|
||||||
|
def get_container_status(self, vmid: int) -> dict:
|
||||||
|
nodes = self._get("/nodes")
|
||||||
|
if not isinstance(nodes, list):
|
||||||
|
return {"error": "no nodes"}
|
||||||
|
node_name = nodes[0]["node"]
|
||||||
|
return self._get(f"/nodes/{node_name}/lxc/{vmid}/status/current")
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_containers(passwords: dict = None, tokens: dict = None) -> list[dict]:
|
||||||
|
"""Query all Proxmox hosts and return combined container list."""
|
||||||
|
if passwords is None:
|
||||||
|
passwords = {}
|
||||||
|
if tokens is None:
|
||||||
|
tokens = {}
|
||||||
|
|
||||||
|
all_cts = []
|
||||||
|
for host_name, host_ip in PROXMOX_HOSTS.items():
|
||||||
|
token = tokens.get(host_name, {})
|
||||||
|
pw = passwords.get(host_name, passwords.get("default", ""))
|
||||||
|
try:
|
||||||
|
client = ProxmoxClient(
|
||||||
|
host_ip, password=pw,
|
||||||
|
token_name=token.get("name", ""),
|
||||||
|
token_value=token.get("value", ""),
|
||||||
|
)
|
||||||
|
containers = client.get_containers()
|
||||||
|
for ct in containers:
|
||||||
|
ct["_host"] = host_name
|
||||||
|
ct["_host_ip"] = host_ip
|
||||||
|
all_cts.extend(containers)
|
||||||
|
except Exception as e:
|
||||||
|
all_cts.append({
|
||||||
|
"_host": host_name,
|
||||||
|
"_host_ip": host_ip,
|
||||||
|
"error": str(e),
|
||||||
|
})
|
||||||
|
return all_cts
|
||||||
|
|
||||||
|
|
||||||
|
def format_containers(containers: list[dict]) -> str:
|
||||||
|
"""Format container list for human/LLM consumption."""
|
||||||
|
if not containers:
|
||||||
|
return "No containers found."
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
current_host = None
|
||||||
|
for ct in sorted(containers, key=lambda c: (c.get("_host", ""), c.get("vmid", 0))):
|
||||||
|
host = ct.get("_host", "unknown")
|
||||||
|
if host != current_host:
|
||||||
|
current_host = host
|
||||||
|
lines.append(f"\n## {host}")
|
||||||
|
lines.append("| CT | Name | Status | CPU | RAM (MB) |")
|
||||||
|
lines.append("|---|---|---|---|---|")
|
||||||
|
|
||||||
|
if "error" in ct:
|
||||||
|
lines.append(f"| — | ERROR | {ct['error'][:60]} | — | — |")
|
||||||
|
continue
|
||||||
|
|
||||||
|
vmid = ct.get("vmid", "?")
|
||||||
|
name = ct.get("name", "?")
|
||||||
|
status = ct.get("status", "?")
|
||||||
|
cpus = ct.get("cpus", "?")
|
||||||
|
mem_mb = ct.get("mem", 0) // (1024 * 1024) if ct.get("mem") else 0
|
||||||
|
maxmem_mb = ct.get("maxmem", 0) // (1024 * 1024) if ct.get("maxmem") else 0
|
||||||
|
lines.append(f"| {vmid} | {name} | {status} | {cpus} | {mem_mb}/{maxmem_mb} |")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
@ -1,44 +0,0 @@
|
||||||
"""OpenRouter LLM-Wrapper für natürliche Antworten."""
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
|
||||||
from core import config
|
|
||||||
|
|
||||||
MODEL = "openai/gpt-4o-mini"
|
|
||||||
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot für ein Homelab mit mehreren Proxmox-Servern.
|
|
||||||
Du antwortest kurz, präzise und auf Deutsch.
|
|
||||||
Du bekommst Live-Daten aus Loki (Logs), Proxmox (Container-Status) und homelab.conf.
|
|
||||||
Wenn alles in Ordnung ist, sag das kurz. Bei Problemen erkläre was los ist und schlage Lösungen vor.
|
|
||||||
Nutze Emojis sparsam. Formatiere für Telegram (kein Markdown, nur einfacher Text)."""
|
|
||||||
|
|
||||||
|
|
||||||
def _get_api_key() -> str:
|
|
||||||
cfg = config.parse_config()
|
|
||||||
return cfg.api_keys.get("openrouter_key", "")
|
|
||||||
|
|
||||||
|
|
||||||
def ask(question: str, context: str) -> str:
|
|
||||||
"""Stellt eine Frage mit Kontext an OpenRouter."""
|
|
||||||
api_key = _get_api_key()
|
|
||||||
if not api_key:
|
|
||||||
return "OpenRouter API Key fehlt in homelab.conf"
|
|
||||||
|
|
||||||
messages = [
|
|
||||||
{"role": "system", "content": SYSTEM_PROMPT},
|
|
||||||
{"role": "user", "content": f"Kontext (Live-Daten):\n{context}\n\nFrage: {question}"},
|
|
||||||
]
|
|
||||||
|
|
||||||
try:
|
|
||||||
r = requests.post(
|
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
|
||||||
headers={"Authorization": f"Bearer {api_key}"},
|
|
||||||
json={"model": MODEL, "messages": messages, "max_tokens": 500},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
r.raise_for_status()
|
|
||||||
return r.json()["choices"][0]["message"]["content"]
|
|
||||||
except Exception as e:
|
|
||||||
return f"LLM-Fehler: {e}"
|
|
||||||
|
|
@ -1,138 +0,0 @@
|
||||||
"""Proaktives Monitoring — regelbasiert (Stufe 1) + KI (Stufe 2)."""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
import requests
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
|
||||||
from core import config, loki_client, proxmox_client
|
|
||||||
|
|
||||||
|
|
||||||
def _get_tokens(cfg):
|
|
||||||
tokens = {}
|
|
||||||
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
|
|
||||||
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
|
|
||||||
if tn and tv:
|
|
||||||
tokens["pve-hetzner"] = {"name": tn, "value": tv}
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
|
|
||||||
def _get_passwords(cfg):
|
|
||||||
return {
|
|
||||||
"pve-hetzner": cfg.passwords.get("hetzner", ""),
|
|
||||||
"pve1": cfg.passwords.get("default", ""),
|
|
||||||
"pve3": cfg.passwords.get("default", ""),
|
|
||||||
"default": cfg.passwords.get("default", ""),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
CRITICAL_CONTAINERS = [101, 109, 111, 112, 113, 115]
|
|
||||||
|
|
||||||
|
|
||||||
def check_all() -> list[str]:
|
|
||||||
"""Regelbasierter Check (Stufe 1). Gibt Liste von Alarmen zurück."""
|
|
||||||
cfg = config.parse_config()
|
|
||||||
alerts = []
|
|
||||||
|
|
||||||
containers = proxmox_client.get_all_containers(
|
|
||||||
_get_passwords(cfg), _get_tokens(cfg)
|
|
||||||
)
|
|
||||||
for ct in containers:
|
|
||||||
if "error" in ct:
|
|
||||||
continue
|
|
||||||
vmid = ct.get("vmid", 0)
|
|
||||||
name = ct.get("name", "?")
|
|
||||||
status = ct.get("status", "unknown")
|
|
||||||
if vmid in CRITICAL_CONTAINERS and status != "running":
|
|
||||||
alerts.append(f"🔴 CT {vmid} ({name}) ist {status}!")
|
|
||||||
|
|
||||||
mem = ct.get("mem", 0)
|
|
||||||
maxmem = ct.get("maxmem", 1)
|
|
||||||
if maxmem > 0 and mem / maxmem > 0.90:
|
|
||||||
pct = int(mem / maxmem * 100)
|
|
||||||
alerts.append(f"⚠️ CT {vmid} ({name}) RAM bei {pct}%")
|
|
||||||
|
|
||||||
errors = loki_client.get_errors(hours=0.5, limit=50)
|
|
||||||
error_lines = [e for e in errors if "error" not in e]
|
|
||||||
panic_lines = [e for e in error_lines if
|
|
||||||
any(w in e.get("line", "").lower() for w in ["panic", "fatal", "oom", "out of memory"])
|
|
||||||
and "query=" not in e.get("line", "")
|
|
||||||
and "caller=metrics" not in e.get("line", "")
|
|
||||||
]
|
|
||||||
if panic_lines:
|
|
||||||
hosts = set(e.get("host", "?") for e in panic_lines)
|
|
||||||
hosts.discard("${HOSTNAME}")
|
|
||||||
if hosts:
|
|
||||||
alerts.append(f"🔴 Kritische Fehler (panic/fatal/OOM) auf: {', '.join(hosts)}")
|
|
||||||
|
|
||||||
silent = loki_client.check_silence(minutes=35)
|
|
||||||
if silent and "error" not in silent[0]:
|
|
||||||
names = [s["host"] for s in silent]
|
|
||||||
alerts.append(f"⚠️ Keine Logs seit 35+ Min: {', '.join(names)}")
|
|
||||||
|
|
||||||
return alerts
|
|
||||||
|
|
||||||
|
|
||||||
def format_report() -> str:
|
|
||||||
"""Tagesbericht: Gesamtstatus aller Systeme."""
|
|
||||||
cfg = config.parse_config()
|
|
||||||
lines = ["📋 Tagesbericht Homelab\n"]
|
|
||||||
|
|
||||||
containers = proxmox_client.get_all_containers(
|
|
||||||
_get_passwords(cfg), _get_tokens(cfg)
|
|
||||||
)
|
|
||||||
running = [c for c in containers if c.get("status") == "running"]
|
|
||||||
stopped = [c for c in containers if c.get("status") == "stopped"]
|
|
||||||
errors_ct = [c for c in containers if "error" in c]
|
|
||||||
lines.append(f"Container: {len(running)} running, {len(stopped)} stopped, {len(errors_ct)} nicht erreichbar")
|
|
||||||
|
|
||||||
errors = loki_client.get_errors(hours=24, limit=100)
|
|
||||||
error_count = len([e for e in errors if "error" not in e])
|
|
||||||
lines.append(f"Fehler (24h): {error_count}")
|
|
||||||
|
|
||||||
silent = loki_client.check_silence(minutes=35)
|
|
||||||
if silent and "error" not in (silent[0] if silent else {}):
|
|
||||||
names = [s["host"] for s in silent]
|
|
||||||
lines.append(f"Stille Hosts: {', '.join(names)}")
|
|
||||||
else:
|
|
||||||
lines.append("Stille Hosts: keine")
|
|
||||||
|
|
||||||
alerts = check_all()
|
|
||||||
if alerts:
|
|
||||||
lines.append(f"\n⚠️ {len(alerts)} aktive Alarme:")
|
|
||||||
lines.extend(alerts)
|
|
||||||
else:
|
|
||||||
lines.append("\n✅ Keine Alarme — alles läuft.")
|
|
||||||
|
|
||||||
return "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
def send_alert(token: str, chat_id: str, message: str):
|
|
||||||
"""Sendet eine Nachricht via Telegram."""
|
|
||||||
requests.post(
|
|
||||||
f"https://api.telegram.org/bot{token}/sendMessage",
|
|
||||||
data={"chat_id": chat_id, "text": message},
|
|
||||||
timeout=10,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def run_check_and_alert():
|
|
||||||
"""Hauptfunktion für Cron: prüft und sendet Alerts falls nötig."""
|
|
||||||
cfg = config.parse_config()
|
|
||||||
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
|
|
||||||
chat_id = cfg.raw.get("TG_CHAT_ID", "")
|
|
||||||
if not token or not chat_id:
|
|
||||||
return
|
|
||||||
|
|
||||||
alerts = check_all()
|
|
||||||
if alerts:
|
|
||||||
msg = "🔧 Hausmeister-Check\n\n" + "\n".join(alerts)
|
|
||||||
send_alert(token, chat_id, msg)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
import sys
|
|
||||||
if len(sys.argv) > 1 and sys.argv[1] == "report":
|
|
||||||
print(format_report())
|
|
||||||
else:
|
|
||||||
run_check_and_alert()
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
python-telegram-bot>=21.0
|
|
||||||
requests>=2.31
|
|
||||||
|
|
@ -1,224 +0,0 @@
|
||||||
"""Orbitalo Hausmeister — Telegram Bot für Homelab-Management."""
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
|
||||||
|
|
||||||
from telegram import BotCommand, Update
|
|
||||||
from telegram.ext import (
|
|
||||||
Application, CommandHandler, MessageHandler, filters, ContextTypes,
|
|
||||||
)
|
|
||||||
|
|
||||||
BOT_COMMANDS = [
|
|
||||||
BotCommand("status", "Alle Container"),
|
|
||||||
BotCommand("errors", "Aktuelle Fehler"),
|
|
||||||
BotCommand("ct", "Container-Detail (/ct 109)"),
|
|
||||||
BotCommand("health", "Health-Check (/health wordpress)"),
|
|
||||||
BotCommand("logs", "Letzte Logs (/logs rss-manager)"),
|
|
||||||
BotCommand("silence", "Stille Hosts"),
|
|
||||||
BotCommand("report", "Tagesbericht"),
|
|
||||||
BotCommand("check", "Monitoring-Check"),
|
|
||||||
BotCommand("start", "Hilfe anzeigen"),
|
|
||||||
]
|
|
||||||
|
|
||||||
import context
|
|
||||||
import llm
|
|
||||||
import monitor
|
|
||||||
from core import config
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
|
|
||||||
level=logging.INFO,
|
|
||||||
)
|
|
||||||
log = logging.getLogger("hausmeister")
|
|
||||||
|
|
||||||
ALLOWED_CHAT_IDS: set[int] = set()
|
|
||||||
|
|
||||||
|
|
||||||
def _load_token_and_chat():
|
|
||||||
cfg = config.parse_config()
|
|
||||||
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
|
|
||||||
chat_id = cfg.raw.get("TG_CHAT_ID", "")
|
|
||||||
if chat_id:
|
|
||||||
ALLOWED_CHAT_IDS.add(int(chat_id))
|
|
||||||
return token
|
|
||||||
|
|
||||||
|
|
||||||
def _authorized(update: Update) -> bool:
|
|
||||||
if not ALLOWED_CHAT_IDS:
|
|
||||||
return True
|
|
||||||
return update.effective_chat.id in ALLOWED_CHAT_IDS
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
await update.message.reply_text(
|
|
||||||
"🔧 Orbitalo Hausmeister-Bot\n\n"
|
|
||||||
"Befehle:\n"
|
|
||||||
"/status — Alle Container\n"
|
|
||||||
"/errors — Aktuelle Fehler\n"
|
|
||||||
"/ct <nr> — Container-Detail\n"
|
|
||||||
"/health <name> — Health-Check\n"
|
|
||||||
"/logs <name> — Letzte Logs\n"
|
|
||||||
"/silence — Stille Hosts\n"
|
|
||||||
"/report — Tagesbericht\n"
|
|
||||||
"/check — Monitoring-Check\n\n"
|
|
||||||
"Oder einfach eine Frage stellen!"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
await update.message.reply_text("⏳ Lade Container-Status...")
|
|
||||||
try:
|
|
||||||
text = context.gather_status()
|
|
||||||
if len(text) > 4000:
|
|
||||||
text = text[:4000] + "\n..."
|
|
||||||
await update.message.reply_text(text)
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_errors(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
await update.message.reply_text("⏳ Suche Fehler...")
|
|
||||||
try:
|
|
||||||
text = context.gather_errors(hours=2)
|
|
||||||
await update.message.reply_text(text[:4000])
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_ct(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
args = ctx.args
|
|
||||||
if not args:
|
|
||||||
await update.message.reply_text("Bitte CT-Nummer angeben: /ct 109")
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
text = context.gather_container_status(args[0])
|
|
||||||
await update.message.reply_text(text)
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_health(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
args = ctx.args
|
|
||||||
if not args:
|
|
||||||
await update.message.reply_text("Bitte Hostname angeben: /health wordpress")
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
text = context.gather_health(args[0])
|
|
||||||
await update.message.reply_text(text)
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_logs(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
args = ctx.args
|
|
||||||
if not args:
|
|
||||||
await update.message.reply_text("Bitte Hostname angeben: /logs rss-manager")
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
text = context.gather_logs(args[0])
|
|
||||||
await update.message.reply_text(text[:4000])
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_silence(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
try:
|
|
||||||
text = context.gather_silence()
|
|
||||||
await update.message.reply_text(text)
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_report(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
await update.message.reply_text("⏳ Erstelle Tagesbericht...")
|
|
||||||
try:
|
|
||||||
text = monitor.format_report()
|
|
||||||
await update.message.reply_text(text[:4000])
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def cmd_check(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
await update.message.reply_text("⏳ Prüfe Systeme...")
|
|
||||||
try:
|
|
||||||
alerts = monitor.check_all()
|
|
||||||
if alerts:
|
|
||||||
text = f"⚠️ {len(alerts)} Alarme:\n\n" + "\n".join(alerts)
|
|
||||||
else:
|
|
||||||
text = "✅ Keine Alarme — alles läuft."
|
|
||||||
await update.message.reply_text(text)
|
|
||||||
except Exception as e:
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|
||||||
"""Freitext-Fragen → Kontext sammeln → LLM → Antwort."""
|
|
||||||
if not _authorized(update):
|
|
||||||
return
|
|
||||||
question = update.message.text
|
|
||||||
if not question:
|
|
||||||
return
|
|
||||||
|
|
||||||
await update.message.reply_text("🤔 Denke nach...")
|
|
||||||
try:
|
|
||||||
data = context.gather_context_for_question(question)
|
|
||||||
answer = llm.ask(question, data)
|
|
||||||
await update.message.reply_text(answer[:4000])
|
|
||||||
except Exception as e:
|
|
||||||
log.exception("Fehler bei Freitext")
|
|
||||||
await update.message.reply_text(f"Fehler: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
token = _load_token_and_chat()
|
|
||||||
if not token:
|
|
||||||
log.error("TG_HAUSMEISTER_TOKEN fehlt in homelab.conf!")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
log.info("Starte Orbitalo Hausmeister-Bot...")
|
|
||||||
app = Application.builder().token(token).build()
|
|
||||||
|
|
||||||
app.add_handler(CommandHandler("start", cmd_start))
|
|
||||||
app.add_handler(CommandHandler("status", cmd_status))
|
|
||||||
app.add_handler(CommandHandler("errors", cmd_errors))
|
|
||||||
app.add_handler(CommandHandler("ct", cmd_ct))
|
|
||||||
app.add_handler(CommandHandler("health", cmd_health))
|
|
||||||
app.add_handler(CommandHandler("logs", cmd_logs))
|
|
||||||
app.add_handler(CommandHandler("silence", cmd_silence))
|
|
||||||
app.add_handler(CommandHandler("report", cmd_report))
|
|
||||||
app.add_handler(CommandHandler("check", cmd_check))
|
|
||||||
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
|
||||||
|
|
||||||
async def post_init(application):
|
|
||||||
await application.bot.set_my_commands(BOT_COMMANDS)
|
|
||||||
log.info("Kommandomenü registriert")
|
|
||||||
|
|
||||||
app.post_init = post_init
|
|
||||||
log.info("Bot läuft — polling gestartet")
|
|
||||||
app.run_polling(allowed_updates=Update.ALL_TYPES)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
Loading…
Add table
Reference in a new issue