feat: Bot-Code + Core-Module persistent aufgenommen

This commit is contained in:
Auto-Sync 2026-03-08 11:57:26 +01:00
parent cf5192c5b0
commit 17d51e6924
10 changed files with 581 additions and 567 deletions

View file

@ -1,159 +0,0 @@
"""Intelligente Kontext-Sammlung für den Hausmeister-Bot.
Entscheidet anhand der Frage welche Datenquellen abgefragt werden."""
import sys
import os
import re
sys.path.insert(0, os.path.dirname(__file__))
from core import config, loki_client, proxmox_client
def _load_config():
return config.parse_config()
def _get_tokens(cfg):
tokens = {}
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
if tn and tv:
tokens["pve-hetzner"] = {"name": tn, "value": tv}
return tokens
def _get_passwords(cfg):
return {
"pve-hetzner": cfg.passwords.get("hetzner", ""),
"pve1": cfg.passwords.get("default", ""),
"pve3": cfg.passwords.get("default", ""),
"default": cfg.passwords.get("default", ""),
}
def gather_status() -> str:
"""Komplett-Status aller Container für /status."""
cfg = _load_config()
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
return proxmox_client.format_containers(containers)
def gather_errors(hours: float = 2) -> str:
"""Aktuelle Fehler aus Loki für /errors."""
entries = loki_client.get_errors(hours=hours, limit=30)
return loki_client.format_logs(entries)
def gather_container_status(query: str) -> str:
"""Status eines einzelnen Containers."""
cfg = _load_config()
vmid = None
name = None
m = re.search(r'\b(\d{3})\b', query)
if m:
vmid = int(m.group(1))
else:
name = query.strip()
ct = config.get_container(cfg, vmid=vmid, name=name)
if not ct:
return f"Container nicht gefunden: {query}"
host_ip = proxmox_client.PROXMOX_HOSTS.get(ct.host)
if not host_ip:
return f"Host nicht erreichbar: {ct.host}"
token = _get_tokens(cfg).get(ct.host, {})
pw = _get_passwords(cfg).get(ct.host, "")
try:
client = proxmox_client.ProxmoxClient(
host_ip, password=pw,
token_name=token.get("name", ""),
token_value=token.get("value", ""),
)
status = client.get_container_status(ct.vmid)
except Exception as e:
return f"Proxmox-Fehler: {e}"
mem_mb = status.get("mem", 0) // (1024 * 1024)
maxmem_mb = status.get("maxmem", 0) // (1024 * 1024)
uptime_h = status.get("uptime", 0) // 3600
return (
f"CT {ct.vmid}{ct.name}\n"
f"Host: {ct.host}\n"
f"Status: {status.get('status', '?')}\n"
f"RAM: {mem_mb}/{maxmem_mb} MB\n"
f"CPU: {status.get('cpus', '?')} Kerne\n"
f"Uptime: {uptime_h}h\n"
f"Tailscale: {ct.tailscale_ip or ''}\n"
f"Dienste: {ct.services}"
)
def gather_logs(container: str, hours: float = 1) -> str:
"""Logs eines Containers aus Loki."""
entries = loki_client.query_logs(
f'{{host="{container}"}}', hours=hours, limit=20
)
return loki_client.format_logs(entries)
def gather_health(container: str) -> str:
"""Health-Check eines Containers."""
health = loki_client.get_health(container, hours=24)
status_emoji = {"healthy": "", "warning": "⚠️", "critical": "🔴"}.get(
health.get("status", ""), ""
)
return (
f"{status_emoji} {health.get('host', container)}\n"
f"Status: {health.get('status', '?')}\n"
f"Fehler (24h): {health.get('errors_last_{hours}h', '?')}\n"
f"Sendet Logs: {'ja' if health.get('sending_logs') else 'nein'}"
)
def gather_silence() -> str:
"""Welche Hosts senden keine Logs?"""
silent = loki_client.check_silence(minutes=35)
if not silent:
return "✅ Alle Hosts senden Logs."
if silent and "error" in silent[0]:
return f"Fehler: {silent[0]['error']}"
lines = ["⚠️ Stille Hosts (keine Logs seit 35+ Min):\n"]
for s in silent:
lines.append(f"{s['host']}")
return "\n".join(lines)
def gather_context_for_question(question: str) -> str:
"""Sammelt relevanten Kontext für eine Freitext-Frage."""
q = question.lower()
parts = []
if any(w in q for w in ["fehler", "error", "problem", "kaputt", "down"]):
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=2))
if any(w in q for w in ["status", "läuft", "container", "übersicht", "alles"]):
parts.append("=== Container Status ===\n" + gather_status())
if any(w in q for w in ["still", "silence", "stumm", "logs"]):
parts.append("=== Stille Hosts ===\n" + gather_silence())
ct_match = re.search(r'\bct[- ]?(\d{3})\b', q)
if ct_match:
parts.append(f"=== CT {ct_match.group(1)} ===\n" + gather_container_status(ct_match.group(1)))
for name in ["wordpress", "rss", "seafile", "forgejo", "portainer",
"fuenfvoracht", "redax", "flugscanner", "edelmetall"]:
if name in q:
parts.append(f"=== {name} ===\n" + gather_container_status(name))
if not parts:
parts.append("=== Container Status ===\n" + gather_status())
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=1))
return "\n\n".join(parts)

View file

@ -0,0 +1 @@

View file

@ -0,0 +1,179 @@
"""Parses homelab.conf — the single source of truth for infrastructure facts."""
import os
import re
from pathlib import Path
from dataclasses import dataclass, field
HOMELAB_CONF_PATHS = [
Path("/root/homelab-brain/homelab.conf"),
Path("/opt/homelab-brain/homelab.conf"),
]
@dataclass
class Container:
vmid: int
name: str
tailscale_ip: str
services: str
host: str # pve-hetzner, pve1, pve3
@dataclass
class Tunnel:
ct_id: int
domain: str
target: str
status: str
@dataclass
class HomelabConfig:
raw: dict = field(default_factory=dict)
domains: dict = field(default_factory=dict)
servers: dict = field(default_factory=dict)
passwords: dict = field(default_factory=dict)
containers: list = field(default_factory=list)
telegram: dict = field(default_factory=dict)
api_keys: dict = field(default_factory=dict)
tunnels: list = field(default_factory=list)
def _parse_container_sections(path: Path) -> dict:
"""Parse section comments to determine which host each CT_ variable belongs to."""
section_map = {
"pve-hetzner": re.compile(r"#.*CONTAINER.*pve-hetzner", re.IGNORECASE),
"pve1": re.compile(r"#.*CONTAINER.*pve1", re.IGNORECASE),
"pve3": re.compile(r"#.*CONTAINER.*pve3", re.IGNORECASE),
}
ct_var = re.compile(r"^(CT_\d+(?:_\w+)?)\s*=")
result = {}
current_host = "pve-hetzner"
with open(path) as f:
for line in f:
stripped = line.strip()
if stripped.startswith("#"):
for host, pattern in section_map.items():
if pattern.search(stripped):
current_host = host
break
else:
m = ct_var.match(stripped)
if m:
result[m.group(1)] = current_host
return result
def find_config() -> Path:
for p in HOMELAB_CONF_PATHS:
if p.exists():
return p
raise FileNotFoundError(f"homelab.conf not found in {HOMELAB_CONF_PATHS}")
def parse_config(path: Path = None) -> HomelabConfig:
if path is None:
path = find_config()
raw = {}
with open(path) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#"):
continue
m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*)="?(.*?)"?\s*$', line)
if m:
raw[m.group(1)] = m.group(2)
cfg = HomelabConfig(raw=raw)
for k, v in raw.items():
if k.startswith("DOMAIN_"):
cfg.domains[k.replace("DOMAIN_", "").lower()] = v
elif k.startswith("SRV_"):
cfg.servers[k.replace("SRV_", "").lower()] = v
elif k.startswith("PW_"):
cfg.passwords[k.replace("PW_", "").lower()] = v
elif k.startswith("TG_"):
cfg.telegram[k.lower()] = v
elif k.startswith("FORGEJO_") or k.startswith("GITHUB_") or k.startswith("OPENROUTER_"):
cfg.api_keys[k.lower()] = v
ct_pattern = re.compile(r"^CT_(\d+)(?:_(PVE\d+))?$")
section_hosts = _parse_container_sections(path)
for k, v in raw.items():
m = ct_pattern.match(k)
if m:
vmid = int(m.group(1))
explicit_host = m.group(2)
if explicit_host:
host = {"PVE1": "pve1", "PVE3": "pve3"}.get(explicit_host, explicit_host.lower())
else:
host = section_hosts.get(k, "pve-hetzner")
parts = v.split("|")
if len(parts) >= 3:
cfg.containers.append(Container(
vmid=vmid,
name=parts[0],
tailscale_ip=parts[1] if parts[1] != "" else "",
services=parts[2],
host=host,
))
for k, v in raw.items():
m = re.match(r"^TUNNEL_(\d+)(?:_\w+)?$", k)
if m:
ct_id = int(m.group(1))
parts = v.split("|")
if len(parts) >= 3:
cfg.tunnels.append(Tunnel(
ct_id=ct_id,
domain=parts[0],
target=parts[1],
status=parts[2],
))
cfg.containers.sort(key=lambda c: (c.host, c.vmid))
return cfg
def get_container(cfg: HomelabConfig, vmid: int = None, name: str = None) -> Container | None:
for c in cfg.containers:
if vmid and c.vmid == vmid:
return c
if name and name.lower() in c.name.lower():
return c
return None
def format_overview(cfg: HomelabConfig) -> str:
lines = ["# Homelab Infrastructure (from homelab.conf)\n"]
lines.append("## Domains")
for k, v in cfg.domains.items():
lines.append(f"- {k}: {v}")
lines.append("\n## Servers (Tailscale)")
for k, v in cfg.servers.items():
lines.append(f"- {k}: {v}")
current_host = None
for c in cfg.containers:
if c.host != current_host:
current_host = c.host
lines.append(f"\n## Containers on {current_host}")
lines.append("| CT | Name | Tailscale | Services |")
lines.append("|---|---|---|---|")
ts = c.tailscale_ip or ""
lines.append(f"| {c.vmid} | {c.name} | {ts} | {c.services} |")
if cfg.tunnels:
lines.append("\n## Cloudflare Tunnels")
for t in cfg.tunnels:
lines.append(f"- CT {t.ct_id}: {t.domain}{t.target} ({t.status})")
return "\n".join(lines)

View file

@ -0,0 +1,130 @@
"""Loki API client for querying centralized logs."""
import requests
from datetime import datetime, timezone, timedelta
LOKI_URL = "http://100.109.206.43:3100"
def _query(endpoint: str, params: dict, base_url: str = None) -> dict:
url = f"{base_url or LOKI_URL}{endpoint}"
try:
r = requests.get(url, params=params, timeout=10)
r.raise_for_status()
return r.json()
except requests.RequestException as e:
return {"error": str(e)}
def _ns(dt: datetime) -> str:
return str(int(dt.timestamp() * 1e9))
def query_logs(query: str, hours: float = 1, limit: int = 100) -> list[dict]:
"""Run a LogQL query and return log entries."""
now = datetime.now(timezone.utc)
start = now - timedelta(hours=hours)
data = _query("/loki/api/v1/query_range", {
"query": query,
"start": _ns(start),
"end": _ns(now),
"limit": limit,
"direction": "backward",
})
if "error" in data:
return [{"error": data["error"]}]
entries = []
for stream in data.get("data", {}).get("result", []):
labels = stream.get("stream", {})
for ts, line in stream.get("values", []):
entries.append({
"timestamp": ts,
"host": labels.get("host", labels.get("job", "unknown")),
"line": line,
})
return entries
def get_errors(container: str = None, hours: float = 1, limit: int = 50) -> list[dict]:
"""Get error-level logs, optionally filtered by container hostname."""
if container:
q = f'{{host="{container}"}} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied"'
else:
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied"'
return query_logs(q, hours=hours, limit=limit)
def get_labels() -> list[str]:
"""Get all available label values for 'host'."""
data = _query("/loki/api/v1/label/host/values", {})
if "error" in data:
return []
return data.get("data", [])
def check_silence(minutes: int = 35) -> list[dict]:
"""Find hosts that haven't sent logs within the given timeframe."""
all_hosts = get_labels()
if not all_hosts:
return [{"error": "Could not fetch host labels from Loki"}]
now = datetime.now(timezone.utc)
start = now - timedelta(minutes=minutes)
silent = []
for host in all_hosts:
data = _query("/loki/api/v1/query_range", {
"query": f'count_over_time({{host="{host}"}}[{minutes}m])',
"start": _ns(start),
"end": _ns(now),
"limit": 1,
})
results = data.get("data", {}).get("result", [])
has_logs = any(
int(v[1]) > 0
for r in results
for v in r.get("values", [])
)
if not has_logs:
silent.append({"host": host, "silent_minutes": minutes})
return silent
def get_health(container: str, hours: float = 24) -> dict:
"""Get a health summary for a specific container."""
errors = get_errors(container=container, hours=hours, limit=200)
error_count = len([e for e in errors if "error" not in e])
recent = query_logs(f'{{host="{container}"}}', hours=0.5, limit=5)
has_recent = len([e for e in recent if "error" not in e]) > 0
return {
"host": container,
"errors_last_{hours}h": error_count,
"sending_logs": has_recent,
"status": "healthy" if error_count < 5 and has_recent else
"warning" if error_count < 20 else "critical",
}
def format_logs(entries: list[dict], max_lines: int = 30) -> str:
"""Format log entries for human/LLM consumption."""
if not entries:
return "No log entries found."
if entries and "error" in entries[0]:
return f"Loki error: {entries[0]['error']}"
lines = []
for e in entries[:max_lines]:
host = e.get("host", "?")
line = e.get("line", "").strip()
if len(line) > 200:
line = line[:200] + "..."
lines.append(f"[{host}] {line}")
total = len(entries)
if total > max_lines:
lines.append(f"\n... and {total - max_lines} more entries")
return "\n".join(lines)

View file

@ -0,0 +1,133 @@
"""Prometheus API client for querying system metrics."""
import requests
from datetime import datetime, timezone, timedelta
PROMETHEUS_URL = "http://100.88.230.59:9090"
def _query(endpoint: str, params: dict, base_url: str = None) -> dict:
url = f"{base_url or PROMETHEUS_URL}{endpoint}"
try:
r = requests.get(url, params=params, timeout=10)
r.raise_for_status()
return r.json()
except requests.RequestException as e:
return {"error": str(e), "status": "unavailable"}
def instant_query(query: str) -> dict:
"""Run an instant PromQL query."""
return _query("/api/v1/query", {"query": query})
def range_query(query: str, hours: float = 1, step: str = "5m") -> dict:
"""Run a range PromQL query."""
now = datetime.now(timezone.utc)
start = now - timedelta(hours=hours)
return _query("/api/v1/query_range", {
"query": query,
"start": start.isoformat(),
"end": now.isoformat(),
"step": step,
})
def get_targets() -> list[dict]:
"""Get all Prometheus scrape targets with their status."""
data = _query("/api/v1/targets", {})
if "error" in data:
return [{"error": data["error"]}]
targets = []
for t in data.get("data", {}).get("activeTargets", []):
targets.append({
"job": t.get("labels", {}).get("job", "unknown"),
"instance": t.get("labels", {}).get("instance", "unknown"),
"health": t.get("health", "unknown"),
"last_scrape": t.get("lastScrape", ""),
})
return targets
def is_available() -> bool:
"""Check if Prometheus is reachable."""
data = _query("/api/v1/query", {"query": "up"})
return "error" not in data or data.get("status") == "success"
def get_cpu(instance: str = None, hours: float = 1) -> dict:
"""Get CPU usage. If instance given, filter to that instance."""
if instance:
q = f'100 - (avg by (instance) (rate(node_cpu_seconds_total{{mode="idle", instance=~"{instance}.*"}}[5m])) * 100)'
else:
q = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
data = instant_query(q)
return _extract_metrics(data, "cpu_percent")
def get_memory(instance: str = None) -> dict:
"""Get memory usage percentage."""
if instance:
q = f'(1 - node_memory_MemAvailable_bytes{{instance=~"{instance}.*"}} / node_memory_MemTotal_bytes{{instance=~"{instance}.*"}}) * 100'
else:
q = '(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100'
data = instant_query(q)
return _extract_metrics(data, "memory_percent")
def get_disk(instance: str = None) -> dict:
"""Get root filesystem usage percentage."""
if instance:
q = f'(1 - node_filesystem_avail_bytes{{mountpoint="/", instance=~"{instance}.*"}} / node_filesystem_size_bytes{{mountpoint="/", instance=~"{instance}.*"}}) * 100'
else:
q = '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100'
data = instant_query(q)
return _extract_metrics(data, "disk_percent")
def get_overview() -> str:
"""Get a formatted overview of all monitored instances."""
if not is_available():
return "Prometheus is not reachable at " + PROMETHEUS_URL
targets = get_targets()
if not targets:
return "No Prometheus targets found."
lines = ["## Prometheus Targets\n"]
for t in targets:
status = "UP" if t["health"] == "up" else "DOWN"
lines.append(f"- [{status}] {t['job']} ({t['instance']})")
cpu = get_cpu()
if "results" in cpu:
lines.append("\n## CPU Usage")
for r in cpu["results"]:
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
mem = get_memory()
if "results" in mem:
lines.append("\n## Memory Usage")
for r in mem["results"]:
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
disk = get_disk()
if "results" in disk:
lines.append("\n## Disk Usage (/)")
for r in disk["results"]:
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
return "\n".join(lines)
def _extract_metrics(data: dict, metric_name: str) -> dict:
if "error" in data and data.get("status") != "success":
return {"error": data.get("error", "unknown error")}
results = []
for r in data.get("data", {}).get("result", []):
instance = r.get("metric", {}).get("instance", "unknown")
value = float(r.get("value", [0, 0])[1])
results.append({"instance": instance, "value": value, "metric": metric_name})
return {"results": results}

View file

@ -0,0 +1,138 @@
"""Proxmox REST API client for querying infrastructure state."""
import requests
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
PROXMOX_HOSTS = {
"pve-hetzner": "100.88.230.59",
"pve1": "100.122.56.60",
"pve3": "100.109.101.12",
}
class ProxmoxClient:
def __init__(self, host_ip: str, user: str = "root@pam",
password: str = "", token_name: str = "", token_value: str = ""):
self.base_url = f"https://{host_ip}:8006/api2/json"
self.user = user
self.password = password
self.token_name = token_name
self.token_value = token_value
self._ticket = None
self._csrf = None
def _auth_header(self) -> dict:
if self.token_name and self.token_value:
return {"Authorization": f"PVEAPIToken={self.user}!{self.token_name}={self.token_value}"}
if self._ticket:
return {}
try:
r = requests.post(
f"{self.base_url}/access/ticket",
data={"username": self.user, "password": self.password},
verify=False, timeout=10,
)
r.raise_for_status()
data = r.json()["data"]
self._ticket = data["ticket"]
self._csrf = data["CSRFPreventionToken"]
except requests.RequestException as e:
raise ConnectionError(f"Proxmox auth failed for {self.base_url}: {e}")
return {}
def _get(self, path: str) -> dict:
headers = self._auth_header()
cookies = {}
if self._ticket:
cookies["PVEAuthCookie"] = self._ticket
headers["CSRFPreventionToken"] = self._csrf
r = requests.get(
f"{self.base_url}{path}",
cookies=cookies, headers=headers,
verify=False, timeout=10,
)
r.raise_for_status()
return r.json().get("data", {})
def get_node_status(self) -> dict:
nodes = self._get("/nodes")
if isinstance(nodes, list):
return nodes[0] if nodes else {}
return nodes
def get_containers(self) -> list[dict]:
nodes = self._get("/nodes")
if not isinstance(nodes, list):
return []
node_name = nodes[0]["node"]
return self._get(f"/nodes/{node_name}/lxc")
def get_container_status(self, vmid: int) -> dict:
nodes = self._get("/nodes")
if not isinstance(nodes, list):
return {"error": "no nodes"}
node_name = nodes[0]["node"]
return self._get(f"/nodes/{node_name}/lxc/{vmid}/status/current")
def get_all_containers(passwords: dict = None, tokens: dict = None) -> list[dict]:
"""Query all Proxmox hosts and return combined container list."""
if passwords is None:
passwords = {}
if tokens is None:
tokens = {}
all_cts = []
for host_name, host_ip in PROXMOX_HOSTS.items():
token = tokens.get(host_name, {})
pw = passwords.get(host_name, passwords.get("default", ""))
try:
client = ProxmoxClient(
host_ip, password=pw,
token_name=token.get("name", ""),
token_value=token.get("value", ""),
)
containers = client.get_containers()
for ct in containers:
ct["_host"] = host_name
ct["_host_ip"] = host_ip
all_cts.extend(containers)
except Exception as e:
all_cts.append({
"_host": host_name,
"_host_ip": host_ip,
"error": str(e),
})
return all_cts
def format_containers(containers: list[dict]) -> str:
"""Format container list for human/LLM consumption."""
if not containers:
return "No containers found."
lines = []
current_host = None
for ct in sorted(containers, key=lambda c: (c.get("_host", ""), c.get("vmid", 0))):
host = ct.get("_host", "unknown")
if host != current_host:
current_host = host
lines.append(f"\n## {host}")
lines.append("| CT | Name | Status | CPU | RAM (MB) |")
lines.append("|---|---|---|---|---|")
if "error" in ct:
lines.append(f"| — | ERROR | {ct['error'][:60]} | — | — |")
continue
vmid = ct.get("vmid", "?")
name = ct.get("name", "?")
status = ct.get("status", "?")
cpus = ct.get("cpus", "?")
mem_mb = ct.get("mem", 0) // (1024 * 1024) if ct.get("mem") else 0
maxmem_mb = ct.get("maxmem", 0) // (1024 * 1024) if ct.get("maxmem") else 0
lines.append(f"| {vmid} | {name} | {status} | {cpus} | {mem_mb}/{maxmem_mb} |")
return "\n".join(lines)

View file

@ -1,44 +0,0 @@
"""OpenRouter LLM-Wrapper für natürliche Antworten."""
import requests
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from core import config
MODEL = "openai/gpt-4o-mini"
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot für ein Homelab mit mehreren Proxmox-Servern.
Du antwortest kurz, präzise und auf Deutsch.
Du bekommst Live-Daten aus Loki (Logs), Proxmox (Container-Status) und homelab.conf.
Wenn alles in Ordnung ist, sag das kurz. Bei Problemen erkläre was los ist und schlage Lösungen vor.
Nutze Emojis sparsam. Formatiere für Telegram (kein Markdown, nur einfacher Text)."""
def _get_api_key() -> str:
cfg = config.parse_config()
return cfg.api_keys.get("openrouter_key", "")
def ask(question: str, context: str) -> str:
"""Stellt eine Frage mit Kontext an OpenRouter."""
api_key = _get_api_key()
if not api_key:
return "OpenRouter API Key fehlt in homelab.conf"
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Kontext (Live-Daten):\n{context}\n\nFrage: {question}"},
]
try:
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}"},
json={"model": MODEL, "messages": messages, "max_tokens": 500},
timeout=30,
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]
except Exception as e:
return f"LLM-Fehler: {e}"

View file

@ -1,138 +0,0 @@
"""Proaktives Monitoring — regelbasiert (Stufe 1) + KI (Stufe 2)."""
import sys
import os
import requests
sys.path.insert(0, os.path.dirname(__file__))
from core import config, loki_client, proxmox_client
def _get_tokens(cfg):
tokens = {}
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
if tn and tv:
tokens["pve-hetzner"] = {"name": tn, "value": tv}
return tokens
def _get_passwords(cfg):
return {
"pve-hetzner": cfg.passwords.get("hetzner", ""),
"pve1": cfg.passwords.get("default", ""),
"pve3": cfg.passwords.get("default", ""),
"default": cfg.passwords.get("default", ""),
}
CRITICAL_CONTAINERS = [101, 109, 111, 112, 113, 115]
def check_all() -> list[str]:
"""Regelbasierter Check (Stufe 1). Gibt Liste von Alarmen zurück."""
cfg = config.parse_config()
alerts = []
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
for ct in containers:
if "error" in ct:
continue
vmid = ct.get("vmid", 0)
name = ct.get("name", "?")
status = ct.get("status", "unknown")
if vmid in CRITICAL_CONTAINERS and status != "running":
alerts.append(f"🔴 CT {vmid} ({name}) ist {status}!")
mem = ct.get("mem", 0)
maxmem = ct.get("maxmem", 1)
if maxmem > 0 and mem / maxmem > 0.90:
pct = int(mem / maxmem * 100)
alerts.append(f"⚠️ CT {vmid} ({name}) RAM bei {pct}%")
errors = loki_client.get_errors(hours=0.5, limit=50)
error_lines = [e for e in errors if "error" not in e]
panic_lines = [e for e in error_lines if
any(w in e.get("line", "").lower() for w in ["panic", "fatal", "oom", "out of memory"])
and "query=" not in e.get("line", "")
and "caller=metrics" not in e.get("line", "")
]
if panic_lines:
hosts = set(e.get("host", "?") for e in panic_lines)
hosts.discard("${HOSTNAME}")
if hosts:
alerts.append(f"🔴 Kritische Fehler (panic/fatal/OOM) auf: {', '.join(hosts)}")
silent = loki_client.check_silence(minutes=35)
if silent and "error" not in silent[0]:
names = [s["host"] for s in silent]
alerts.append(f"⚠️ Keine Logs seit 35+ Min: {', '.join(names)}")
return alerts
def format_report() -> str:
"""Tagesbericht: Gesamtstatus aller Systeme."""
cfg = config.parse_config()
lines = ["📋 Tagesbericht Homelab\n"]
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
running = [c for c in containers if c.get("status") == "running"]
stopped = [c for c in containers if c.get("status") == "stopped"]
errors_ct = [c for c in containers if "error" in c]
lines.append(f"Container: {len(running)} running, {len(stopped)} stopped, {len(errors_ct)} nicht erreichbar")
errors = loki_client.get_errors(hours=24, limit=100)
error_count = len([e for e in errors if "error" not in e])
lines.append(f"Fehler (24h): {error_count}")
silent = loki_client.check_silence(minutes=35)
if silent and "error" not in (silent[0] if silent else {}):
names = [s["host"] for s in silent]
lines.append(f"Stille Hosts: {', '.join(names)}")
else:
lines.append("Stille Hosts: keine")
alerts = check_all()
if alerts:
lines.append(f"\n⚠️ {len(alerts)} aktive Alarme:")
lines.extend(alerts)
else:
lines.append("\n✅ Keine Alarme — alles läuft.")
return "\n".join(lines)
def send_alert(token: str, chat_id: str, message: str):
"""Sendet eine Nachricht via Telegram."""
requests.post(
f"https://api.telegram.org/bot{token}/sendMessage",
data={"chat_id": chat_id, "text": message},
timeout=10,
)
def run_check_and_alert():
"""Hauptfunktion für Cron: prüft und sendet Alerts falls nötig."""
cfg = config.parse_config()
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
chat_id = cfg.raw.get("TG_CHAT_ID", "")
if not token or not chat_id:
return
alerts = check_all()
if alerts:
msg = "🔧 Hausmeister-Check\n\n" + "\n".join(alerts)
send_alert(token, chat_id, msg)
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "report":
print(format_report())
else:
run_check_and_alert()

View file

@ -1,2 +0,0 @@
python-telegram-bot>=21.0
requests>=2.31

View file

@ -1,224 +0,0 @@
"""Orbitalo Hausmeister — Telegram Bot für Homelab-Management."""
import asyncio
import logging
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from telegram import BotCommand, Update
from telegram.ext import (
Application, CommandHandler, MessageHandler, filters, ContextTypes,
)
BOT_COMMANDS = [
BotCommand("status", "Alle Container"),
BotCommand("errors", "Aktuelle Fehler"),
BotCommand("ct", "Container-Detail (/ct 109)"),
BotCommand("health", "Health-Check (/health wordpress)"),
BotCommand("logs", "Letzte Logs (/logs rss-manager)"),
BotCommand("silence", "Stille Hosts"),
BotCommand("report", "Tagesbericht"),
BotCommand("check", "Monitoring-Check"),
BotCommand("start", "Hilfe anzeigen"),
]
import context
import llm
import monitor
from core import config
logging.basicConfig(
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
level=logging.INFO,
)
log = logging.getLogger("hausmeister")
ALLOWED_CHAT_IDS: set[int] = set()
def _load_token_and_chat():
cfg = config.parse_config()
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
chat_id = cfg.raw.get("TG_CHAT_ID", "")
if chat_id:
ALLOWED_CHAT_IDS.add(int(chat_id))
return token
def _authorized(update: Update) -> bool:
if not ALLOWED_CHAT_IDS:
return True
return update.effective_chat.id in ALLOWED_CHAT_IDS
async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text(
"🔧 Orbitalo Hausmeister-Bot\n\n"
"Befehle:\n"
"/status — Alle Container\n"
"/errors — Aktuelle Fehler\n"
"/ct <nr> — Container-Detail\n"
"/health <name> — Health-Check\n"
"/logs <name> — Letzte Logs\n"
"/silence — Stille Hosts\n"
"/report — Tagesbericht\n"
"/check — Monitoring-Check\n\n"
"Oder einfach eine Frage stellen!"
)
async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Lade Container-Status...")
try:
text = context.gather_status()
if len(text) > 4000:
text = text[:4000] + "\n..."
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_errors(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Suche Fehler...")
try:
text = context.gather_errors(hours=2)
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_ct(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte CT-Nummer angeben: /ct 109")
return
try:
text = context.gather_container_status(args[0])
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_health(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte Hostname angeben: /health wordpress")
return
try:
text = context.gather_health(args[0])
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_logs(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte Hostname angeben: /logs rss-manager")
return
try:
text = context.gather_logs(args[0])
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_silence(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
try:
text = context.gather_silence()
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_report(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Erstelle Tagesbericht...")
try:
text = monitor.format_report()
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_check(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Prüfe Systeme...")
try:
alerts = monitor.check_all()
if alerts:
text = f"⚠️ {len(alerts)} Alarme:\n\n" + "\n".join(alerts)
else:
text = "✅ Keine Alarme — alles läuft."
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"""Freitext-Fragen → Kontext sammeln → LLM → Antwort."""
if not _authorized(update):
return
question = update.message.text
if not question:
return
await update.message.reply_text("🤔 Denke nach...")
try:
data = context.gather_context_for_question(question)
answer = llm.ask(question, data)
await update.message.reply_text(answer[:4000])
except Exception as e:
log.exception("Fehler bei Freitext")
await update.message.reply_text(f"Fehler: {e}")
def main():
token = _load_token_and_chat()
if not token:
log.error("TG_HAUSMEISTER_TOKEN fehlt in homelab.conf!")
sys.exit(1)
log.info("Starte Orbitalo Hausmeister-Bot...")
app = Application.builder().token(token).build()
app.add_handler(CommandHandler("start", cmd_start))
app.add_handler(CommandHandler("status", cmd_status))
app.add_handler(CommandHandler("errors", cmd_errors))
app.add_handler(CommandHandler("ct", cmd_ct))
app.add_handler(CommandHandler("health", cmd_health))
app.add_handler(CommandHandler("logs", cmd_logs))
app.add_handler(CommandHandler("silence", cmd_silence))
app.add_handler(CommandHandler("report", cmd_report))
app.add_handler(CommandHandler("check", cmd_check))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
async def post_init(application):
await application.bot.set_my_commands(BOT_COMMANDS)
log.info("Kommandomenü registriert")
app.post_init = post_init
log.info("Bot läuft — polling gestartet")
app.run_polling(allowed_updates=Update.ALL_TYPES)
if __name__ == "__main__":
main()