diff --git a/homelab-ai-bot/context.py b/homelab-ai-bot/context.py new file mode 100644 index 00000000..f09aaa17 --- /dev/null +++ b/homelab-ai-bot/context.py @@ -0,0 +1,159 @@ +"""Intelligente Kontext-Sammlung für den Hausmeister-Bot. +Entscheidet anhand der Frage welche Datenquellen abgefragt werden.""" + +import sys +import os +import re + +sys.path.insert(0, os.path.dirname(__file__)) +from core import config, loki_client, proxmox_client + + +def _load_config(): + return config.parse_config() + + +def _get_tokens(cfg): + tokens = {} + tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "") + tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "") + if tn and tv: + tokens["pve-hetzner"] = {"name": tn, "value": tv} + return tokens + + +def _get_passwords(cfg): + return { + "pve-hetzner": cfg.passwords.get("hetzner", ""), + "pve1": cfg.passwords.get("default", ""), + "pve3": cfg.passwords.get("default", ""), + "default": cfg.passwords.get("default", ""), + } + + +def gather_status() -> str: + """Komplett-Status aller Container für /status.""" + cfg = _load_config() + containers = proxmox_client.get_all_containers( + _get_passwords(cfg), _get_tokens(cfg) + ) + return proxmox_client.format_containers(containers) + + +def gather_errors(hours: float = 2) -> str: + """Aktuelle Fehler aus Loki für /errors.""" + entries = loki_client.get_errors(hours=hours, limit=30) + return loki_client.format_logs(entries) + + +def gather_container_status(query: str) -> str: + """Status eines einzelnen Containers.""" + cfg = _load_config() + vmid = None + name = None + + m = re.search(r'\b(\d{3})\b', query) + if m: + vmid = int(m.group(1)) + else: + name = query.strip() + + ct = config.get_container(cfg, vmid=vmid, name=name) + if not ct: + return f"Container nicht gefunden: {query}" + + host_ip = proxmox_client.PROXMOX_HOSTS.get(ct.host) + if not host_ip: + return f"Host nicht erreichbar: {ct.host}" + + token = _get_tokens(cfg).get(ct.host, {}) + pw = _get_passwords(cfg).get(ct.host, "") + try: + client = proxmox_client.ProxmoxClient( + host_ip, password=pw, + token_name=token.get("name", ""), + token_value=token.get("value", ""), + ) + status = client.get_container_status(ct.vmid) + except Exception as e: + return f"Proxmox-Fehler: {e}" + + mem_mb = status.get("mem", 0) // (1024 * 1024) + maxmem_mb = status.get("maxmem", 0) // (1024 * 1024) + uptime_h = status.get("uptime", 0) // 3600 + + return ( + f"CT {ct.vmid} — {ct.name}\n" + f"Host: {ct.host}\n" + f"Status: {status.get('status', '?')}\n" + f"RAM: {mem_mb}/{maxmem_mb} MB\n" + f"CPU: {status.get('cpus', '?')} Kerne\n" + f"Uptime: {uptime_h}h\n" + f"Tailscale: {ct.tailscale_ip or '—'}\n" + f"Dienste: {ct.services}" + ) + + +def gather_logs(container: str, hours: float = 1) -> str: + """Logs eines Containers aus Loki.""" + entries = loki_client.query_logs( + f'{{host="{container}"}}', hours=hours, limit=20 + ) + return loki_client.format_logs(entries) + + +def gather_health(container: str) -> str: + """Health-Check eines Containers.""" + health = loki_client.get_health(container, hours=24) + status_emoji = {"healthy": "✅", "warning": "⚠️", "critical": "🔴"}.get( + health.get("status", ""), "❓" + ) + return ( + f"{status_emoji} {health.get('host', container)}\n" + f"Status: {health.get('status', '?')}\n" + f"Fehler (24h): {health.get('errors_last_{hours}h', '?')}\n" + f"Sendet Logs: {'ja' if health.get('sending_logs') else 'nein'}" + ) + + +def gather_silence() -> str: + """Welche Hosts senden keine Logs?""" + silent = loki_client.check_silence(minutes=35) + if not silent: + return "✅ Alle Hosts senden Logs." + if silent and "error" in silent[0]: + return f"Fehler: {silent[0]['error']}" + lines = ["⚠️ Stille Hosts (keine Logs seit 35+ Min):\n"] + for s in silent: + lines.append(f" • {s['host']}") + return "\n".join(lines) + + +def gather_context_for_question(question: str) -> str: + """Sammelt relevanten Kontext für eine Freitext-Frage.""" + q = question.lower() + parts = [] + + if any(w in q for w in ["fehler", "error", "problem", "kaputt", "down"]): + parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=2)) + + if any(w in q for w in ["status", "läuft", "container", "übersicht", "alles"]): + parts.append("=== Container Status ===\n" + gather_status()) + + if any(w in q for w in ["still", "silence", "stumm", "logs"]): + parts.append("=== Stille Hosts ===\n" + gather_silence()) + + ct_match = re.search(r'\bct[- ]?(\d{3})\b', q) + if ct_match: + parts.append(f"=== CT {ct_match.group(1)} ===\n" + gather_container_status(ct_match.group(1))) + + for name in ["wordpress", "rss", "seafile", "forgejo", "portainer", + "fuenfvoracht", "redax", "flugscanner", "edelmetall"]: + if name in q: + parts.append(f"=== {name} ===\n" + gather_container_status(name)) + + if not parts: + parts.append("=== Container Status ===\n" + gather_status()) + parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=1)) + + return "\n\n".join(parts) diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py new file mode 100644 index 00000000..89066456 --- /dev/null +++ b/homelab-ai-bot/llm.py @@ -0,0 +1,44 @@ +"""OpenRouter LLM-Wrapper für natürliche Antworten.""" + +import requests +import os +import sys + +sys.path.insert(0, os.path.dirname(__file__)) +from core import config + +MODEL = "openai/gpt-4o-mini" +SYSTEM_PROMPT = """Du bist der Hausmeister-Bot für ein Homelab mit mehreren Proxmox-Servern. +Du antwortest kurz, präzise und auf Deutsch. +Du bekommst Live-Daten aus Loki (Logs), Proxmox (Container-Status) und homelab.conf. +Wenn alles in Ordnung ist, sag das kurz. Bei Problemen erkläre was los ist und schlage Lösungen vor. +Nutze Emojis sparsam. Formatiere für Telegram (kein Markdown, nur einfacher Text).""" + + +def _get_api_key() -> str: + cfg = config.parse_config() + return cfg.api_keys.get("openrouter_key", "") + + +def ask(question: str, context: str) -> str: + """Stellt eine Frage mit Kontext an OpenRouter.""" + api_key = _get_api_key() + if not api_key: + return "OpenRouter API Key fehlt in homelab.conf" + + messages = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": f"Kontext (Live-Daten):\n{context}\n\nFrage: {question}"}, + ] + + try: + r = requests.post( + "https://openrouter.ai/api/v1/chat/completions", + headers={"Authorization": f"Bearer {api_key}"}, + json={"model": MODEL, "messages": messages, "max_tokens": 500}, + timeout=30, + ) + r.raise_for_status() + return r.json()["choices"][0]["message"]["content"] + except Exception as e: + return f"LLM-Fehler: {e}" diff --git a/homelab-ai-bot/monitor.py b/homelab-ai-bot/monitor.py new file mode 100644 index 00000000..a9a609c3 --- /dev/null +++ b/homelab-ai-bot/monitor.py @@ -0,0 +1,138 @@ +"""Proaktives Monitoring — regelbasiert (Stufe 1) + KI (Stufe 2).""" + +import sys +import os +import requests + +sys.path.insert(0, os.path.dirname(__file__)) +from core import config, loki_client, proxmox_client + + +def _get_tokens(cfg): + tokens = {} + tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "") + tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "") + if tn and tv: + tokens["pve-hetzner"] = {"name": tn, "value": tv} + return tokens + + +def _get_passwords(cfg): + return { + "pve-hetzner": cfg.passwords.get("hetzner", ""), + "pve1": cfg.passwords.get("default", ""), + "pve3": cfg.passwords.get("default", ""), + "default": cfg.passwords.get("default", ""), + } + + +CRITICAL_CONTAINERS = [101, 109, 111, 112, 113, 115] + + +def check_all() -> list[str]: + """Regelbasierter Check (Stufe 1). Gibt Liste von Alarmen zurück.""" + cfg = config.parse_config() + alerts = [] + + containers = proxmox_client.get_all_containers( + _get_passwords(cfg), _get_tokens(cfg) + ) + for ct in containers: + if "error" in ct: + continue + vmid = ct.get("vmid", 0) + name = ct.get("name", "?") + status = ct.get("status", "unknown") + if vmid in CRITICAL_CONTAINERS and status != "running": + alerts.append(f"🔴 CT {vmid} ({name}) ist {status}!") + + mem = ct.get("mem", 0) + maxmem = ct.get("maxmem", 1) + if maxmem > 0 and mem / maxmem > 0.90: + pct = int(mem / maxmem * 100) + alerts.append(f"⚠️ CT {vmid} ({name}) RAM bei {pct}%") + + errors = loki_client.get_errors(hours=0.5, limit=50) + error_lines = [e for e in errors if "error" not in e] + panic_lines = [e for e in error_lines if + any(w in e.get("line", "").lower() for w in ["panic", "fatal", "oom", "out of memory"]) + and "query=" not in e.get("line", "") + and "caller=metrics" not in e.get("line", "") + ] + if panic_lines: + hosts = set(e.get("host", "?") for e in panic_lines) + hosts.discard("${HOSTNAME}") + if hosts: + alerts.append(f"🔴 Kritische Fehler (panic/fatal/OOM) auf: {', '.join(hosts)}") + + silent = loki_client.check_silence(minutes=35) + if silent and "error" not in silent[0]: + names = [s["host"] for s in silent] + alerts.append(f"⚠️ Keine Logs seit 35+ Min: {', '.join(names)}") + + return alerts + + +def format_report() -> str: + """Tagesbericht: Gesamtstatus aller Systeme.""" + cfg = config.parse_config() + lines = ["📋 Tagesbericht Homelab\n"] + + containers = proxmox_client.get_all_containers( + _get_passwords(cfg), _get_tokens(cfg) + ) + running = [c for c in containers if c.get("status") == "running"] + stopped = [c for c in containers if c.get("status") == "stopped"] + errors_ct = [c for c in containers if "error" in c] + lines.append(f"Container: {len(running)} running, {len(stopped)} stopped, {len(errors_ct)} nicht erreichbar") + + errors = loki_client.get_errors(hours=24, limit=100) + error_count = len([e for e in errors if "error" not in e]) + lines.append(f"Fehler (24h): {error_count}") + + silent = loki_client.check_silence(minutes=35) + if silent and "error" not in (silent[0] if silent else {}): + names = [s["host"] for s in silent] + lines.append(f"Stille Hosts: {', '.join(names)}") + else: + lines.append("Stille Hosts: keine") + + alerts = check_all() + if alerts: + lines.append(f"\n⚠️ {len(alerts)} aktive Alarme:") + lines.extend(alerts) + else: + lines.append("\n✅ Keine Alarme — alles läuft.") + + return "\n".join(lines) + + +def send_alert(token: str, chat_id: str, message: str): + """Sendet eine Nachricht via Telegram.""" + requests.post( + f"https://api.telegram.org/bot{token}/sendMessage", + data={"chat_id": chat_id, "text": message}, + timeout=10, + ) + + +def run_check_and_alert(): + """Hauptfunktion für Cron: prüft und sendet Alerts falls nötig.""" + cfg = config.parse_config() + token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "") + chat_id = cfg.raw.get("TG_CHAT_ID", "") + if not token or not chat_id: + return + + alerts = check_all() + if alerts: + msg = "🔧 Hausmeister-Check\n\n" + "\n".join(alerts) + send_alert(token, chat_id, msg) + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1 and sys.argv[1] == "report": + print(format_report()) + else: + run_check_and_alert() diff --git a/homelab-ai-bot/requirements.txt b/homelab-ai-bot/requirements.txt new file mode 100644 index 00000000..8d21da30 --- /dev/null +++ b/homelab-ai-bot/requirements.txt @@ -0,0 +1,2 @@ +python-telegram-bot>=21.0 +requests>=2.31 diff --git a/homelab-ai-bot/telegram_bot.py b/homelab-ai-bot/telegram_bot.py new file mode 100644 index 00000000..bc360f43 --- /dev/null +++ b/homelab-ai-bot/telegram_bot.py @@ -0,0 +1,224 @@ +"""Orbitalo Hausmeister — Telegram Bot für Homelab-Management.""" + +import asyncio +import logging +import sys +import os + +sys.path.insert(0, os.path.dirname(__file__)) + +from telegram import BotCommand, Update +from telegram.ext import ( + Application, CommandHandler, MessageHandler, filters, ContextTypes, +) + +BOT_COMMANDS = [ + BotCommand("status", "Alle Container"), + BotCommand("errors", "Aktuelle Fehler"), + BotCommand("ct", "Container-Detail (/ct 109)"), + BotCommand("health", "Health-Check (/health wordpress)"), + BotCommand("logs", "Letzte Logs (/logs rss-manager)"), + BotCommand("silence", "Stille Hosts"), + BotCommand("report", "Tagesbericht"), + BotCommand("check", "Monitoring-Check"), + BotCommand("start", "Hilfe anzeigen"), +] + +import context +import llm +import monitor +from core import config + +logging.basicConfig( + format="%(asctime)s [%(name)s] %(levelname)s: %(message)s", + level=logging.INFO, +) +log = logging.getLogger("hausmeister") + +ALLOWED_CHAT_IDS: set[int] = set() + + +def _load_token_and_chat(): + cfg = config.parse_config() + token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "") + chat_id = cfg.raw.get("TG_CHAT_ID", "") + if chat_id: + ALLOWED_CHAT_IDS.add(int(chat_id)) + return token + + +def _authorized(update: Update) -> bool: + if not ALLOWED_CHAT_IDS: + return True + return update.effective_chat.id in ALLOWED_CHAT_IDS + + +async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + await update.message.reply_text( + "🔧 Orbitalo Hausmeister-Bot\n\n" + "Befehle:\n" + "/status — Alle Container\n" + "/errors — Aktuelle Fehler\n" + "/ct — Container-Detail\n" + "/health — Health-Check\n" + "/logs — Letzte Logs\n" + "/silence — Stille Hosts\n" + "/report — Tagesbericht\n" + "/check — Monitoring-Check\n\n" + "Oder einfach eine Frage stellen!" + ) + + +async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + await update.message.reply_text("⏳ Lade Container-Status...") + try: + text = context.gather_status() + if len(text) > 4000: + text = text[:4000] + "\n..." + await update.message.reply_text(text) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_errors(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + await update.message.reply_text("⏳ Suche Fehler...") + try: + text = context.gather_errors(hours=2) + await update.message.reply_text(text[:4000]) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_ct(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + args = ctx.args + if not args: + await update.message.reply_text("Bitte CT-Nummer angeben: /ct 109") + return + try: + text = context.gather_container_status(args[0]) + await update.message.reply_text(text) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_health(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + args = ctx.args + if not args: + await update.message.reply_text("Bitte Hostname angeben: /health wordpress") + return + try: + text = context.gather_health(args[0]) + await update.message.reply_text(text) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_logs(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + args = ctx.args + if not args: + await update.message.reply_text("Bitte Hostname angeben: /logs rss-manager") + return + try: + text = context.gather_logs(args[0]) + await update.message.reply_text(text[:4000]) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_silence(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + try: + text = context.gather_silence() + await update.message.reply_text(text) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_report(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + await update.message.reply_text("⏳ Erstelle Tagesbericht...") + try: + text = monitor.format_report() + await update.message.reply_text(text[:4000]) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def cmd_check(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not _authorized(update): + return + await update.message.reply_text("⏳ Prüfe Systeme...") + try: + alerts = monitor.check_all() + if alerts: + text = f"⚠️ {len(alerts)} Alarme:\n\n" + "\n".join(alerts) + else: + text = "✅ Keine Alarme — alles läuft." + await update.message.reply_text(text) + except Exception as e: + await update.message.reply_text(f"Fehler: {e}") + + +async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + """Freitext-Fragen → Kontext sammeln → LLM → Antwort.""" + if not _authorized(update): + return + question = update.message.text + if not question: + return + + await update.message.reply_text("🤔 Denke nach...") + try: + data = context.gather_context_for_question(question) + answer = llm.ask(question, data) + await update.message.reply_text(answer[:4000]) + except Exception as e: + log.exception("Fehler bei Freitext") + await update.message.reply_text(f"Fehler: {e}") + + +def main(): + token = _load_token_and_chat() + if not token: + log.error("TG_HAUSMEISTER_TOKEN fehlt in homelab.conf!") + sys.exit(1) + + log.info("Starte Orbitalo Hausmeister-Bot...") + app = Application.builder().token(token).build() + + app.add_handler(CommandHandler("start", cmd_start)) + app.add_handler(CommandHandler("status", cmd_status)) + app.add_handler(CommandHandler("errors", cmd_errors)) + app.add_handler(CommandHandler("ct", cmd_ct)) + app.add_handler(CommandHandler("health", cmd_health)) + app.add_handler(CommandHandler("logs", cmd_logs)) + app.add_handler(CommandHandler("silence", cmd_silence)) + app.add_handler(CommandHandler("report", cmd_report)) + app.add_handler(CommandHandler("check", cmd_check)) + app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) + + async def post_init(application): + await application.bot.set_my_commands(BOT_COMMANDS) + log.info("Kommandomenü registriert") + + app.post_init = post_init + log.info("Bot läuft — polling gestartet") + app.run_polling(allowed_updates=Update.ALL_TYPES) + + +if __name__ == "__main__": + main()