Bot-Code (CT 116) ins Repo: telegram_bot, context, llm, monitor

This commit is contained in:
Auto-Sync 2026-03-08 08:52:37 +01:00
parent 8eea050af1
commit b0facadb16
5 changed files with 567 additions and 0 deletions

159
homelab-ai-bot/context.py Normal file
View file

@ -0,0 +1,159 @@
"""Intelligente Kontext-Sammlung für den Hausmeister-Bot.
Entscheidet anhand der Frage welche Datenquellen abgefragt werden."""
import sys
import os
import re
sys.path.insert(0, os.path.dirname(__file__))
from core import config, loki_client, proxmox_client
def _load_config():
return config.parse_config()
def _get_tokens(cfg):
tokens = {}
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
if tn and tv:
tokens["pve-hetzner"] = {"name": tn, "value": tv}
return tokens
def _get_passwords(cfg):
return {
"pve-hetzner": cfg.passwords.get("hetzner", ""),
"pve1": cfg.passwords.get("default", ""),
"pve3": cfg.passwords.get("default", ""),
"default": cfg.passwords.get("default", ""),
}
def gather_status() -> str:
"""Komplett-Status aller Container für /status."""
cfg = _load_config()
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
return proxmox_client.format_containers(containers)
def gather_errors(hours: float = 2) -> str:
"""Aktuelle Fehler aus Loki für /errors."""
entries = loki_client.get_errors(hours=hours, limit=30)
return loki_client.format_logs(entries)
def gather_container_status(query: str) -> str:
"""Status eines einzelnen Containers."""
cfg = _load_config()
vmid = None
name = None
m = re.search(r'\b(\d{3})\b', query)
if m:
vmid = int(m.group(1))
else:
name = query.strip()
ct = config.get_container(cfg, vmid=vmid, name=name)
if not ct:
return f"Container nicht gefunden: {query}"
host_ip = proxmox_client.PROXMOX_HOSTS.get(ct.host)
if not host_ip:
return f"Host nicht erreichbar: {ct.host}"
token = _get_tokens(cfg).get(ct.host, {})
pw = _get_passwords(cfg).get(ct.host, "")
try:
client = proxmox_client.ProxmoxClient(
host_ip, password=pw,
token_name=token.get("name", ""),
token_value=token.get("value", ""),
)
status = client.get_container_status(ct.vmid)
except Exception as e:
return f"Proxmox-Fehler: {e}"
mem_mb = status.get("mem", 0) // (1024 * 1024)
maxmem_mb = status.get("maxmem", 0) // (1024 * 1024)
uptime_h = status.get("uptime", 0) // 3600
return (
f"CT {ct.vmid}{ct.name}\n"
f"Host: {ct.host}\n"
f"Status: {status.get('status', '?')}\n"
f"RAM: {mem_mb}/{maxmem_mb} MB\n"
f"CPU: {status.get('cpus', '?')} Kerne\n"
f"Uptime: {uptime_h}h\n"
f"Tailscale: {ct.tailscale_ip or ''}\n"
f"Dienste: {ct.services}"
)
def gather_logs(container: str, hours: float = 1) -> str:
"""Logs eines Containers aus Loki."""
entries = loki_client.query_logs(
f'{{host="{container}"}}', hours=hours, limit=20
)
return loki_client.format_logs(entries)
def gather_health(container: str) -> str:
"""Health-Check eines Containers."""
health = loki_client.get_health(container, hours=24)
status_emoji = {"healthy": "", "warning": "⚠️", "critical": "🔴"}.get(
health.get("status", ""), ""
)
return (
f"{status_emoji} {health.get('host', container)}\n"
f"Status: {health.get('status', '?')}\n"
f"Fehler (24h): {health.get('errors_last_{hours}h', '?')}\n"
f"Sendet Logs: {'ja' if health.get('sending_logs') else 'nein'}"
)
def gather_silence() -> str:
"""Welche Hosts senden keine Logs?"""
silent = loki_client.check_silence(minutes=35)
if not silent:
return "✅ Alle Hosts senden Logs."
if silent and "error" in silent[0]:
return f"Fehler: {silent[0]['error']}"
lines = ["⚠️ Stille Hosts (keine Logs seit 35+ Min):\n"]
for s in silent:
lines.append(f"{s['host']}")
return "\n".join(lines)
def gather_context_for_question(question: str) -> str:
"""Sammelt relevanten Kontext für eine Freitext-Frage."""
q = question.lower()
parts = []
if any(w in q for w in ["fehler", "error", "problem", "kaputt", "down"]):
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=2))
if any(w in q for w in ["status", "läuft", "container", "übersicht", "alles"]):
parts.append("=== Container Status ===\n" + gather_status())
if any(w in q for w in ["still", "silence", "stumm", "logs"]):
parts.append("=== Stille Hosts ===\n" + gather_silence())
ct_match = re.search(r'\bct[- ]?(\d{3})\b', q)
if ct_match:
parts.append(f"=== CT {ct_match.group(1)} ===\n" + gather_container_status(ct_match.group(1)))
for name in ["wordpress", "rss", "seafile", "forgejo", "portainer",
"fuenfvoracht", "redax", "flugscanner", "edelmetall"]:
if name in q:
parts.append(f"=== {name} ===\n" + gather_container_status(name))
if not parts:
parts.append("=== Container Status ===\n" + gather_status())
parts.append("=== Aktuelle Fehler ===\n" + gather_errors(hours=1))
return "\n\n".join(parts)

44
homelab-ai-bot/llm.py Normal file
View file

@ -0,0 +1,44 @@
"""OpenRouter LLM-Wrapper für natürliche Antworten."""
import requests
import os
import sys
sys.path.insert(0, os.path.dirname(__file__))
from core import config
MODEL = "openai/gpt-4o-mini"
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot für ein Homelab mit mehreren Proxmox-Servern.
Du antwortest kurz, präzise und auf Deutsch.
Du bekommst Live-Daten aus Loki (Logs), Proxmox (Container-Status) und homelab.conf.
Wenn alles in Ordnung ist, sag das kurz. Bei Problemen erkläre was los ist und schlage Lösungen vor.
Nutze Emojis sparsam. Formatiere für Telegram (kein Markdown, nur einfacher Text)."""
def _get_api_key() -> str:
cfg = config.parse_config()
return cfg.api_keys.get("openrouter_key", "")
def ask(question: str, context: str) -> str:
"""Stellt eine Frage mit Kontext an OpenRouter."""
api_key = _get_api_key()
if not api_key:
return "OpenRouter API Key fehlt in homelab.conf"
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Kontext (Live-Daten):\n{context}\n\nFrage: {question}"},
]
try:
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}"},
json={"model": MODEL, "messages": messages, "max_tokens": 500},
timeout=30,
)
r.raise_for_status()
return r.json()["choices"][0]["message"]["content"]
except Exception as e:
return f"LLM-Fehler: {e}"

138
homelab-ai-bot/monitor.py Normal file
View file

@ -0,0 +1,138 @@
"""Proaktives Monitoring — regelbasiert (Stufe 1) + KI (Stufe 2)."""
import sys
import os
import requests
sys.path.insert(0, os.path.dirname(__file__))
from core import config, loki_client, proxmox_client
def _get_tokens(cfg):
tokens = {}
tn = cfg.raw.get("PVE_TOKEN_HETZNER_NAME", "")
tv = cfg.raw.get("PVE_TOKEN_HETZNER_VALUE", "")
if tn and tv:
tokens["pve-hetzner"] = {"name": tn, "value": tv}
return tokens
def _get_passwords(cfg):
return {
"pve-hetzner": cfg.passwords.get("hetzner", ""),
"pve1": cfg.passwords.get("default", ""),
"pve3": cfg.passwords.get("default", ""),
"default": cfg.passwords.get("default", ""),
}
CRITICAL_CONTAINERS = [101, 109, 111, 112, 113, 115]
def check_all() -> list[str]:
"""Regelbasierter Check (Stufe 1). Gibt Liste von Alarmen zurück."""
cfg = config.parse_config()
alerts = []
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
for ct in containers:
if "error" in ct:
continue
vmid = ct.get("vmid", 0)
name = ct.get("name", "?")
status = ct.get("status", "unknown")
if vmid in CRITICAL_CONTAINERS and status != "running":
alerts.append(f"🔴 CT {vmid} ({name}) ist {status}!")
mem = ct.get("mem", 0)
maxmem = ct.get("maxmem", 1)
if maxmem > 0 and mem / maxmem > 0.90:
pct = int(mem / maxmem * 100)
alerts.append(f"⚠️ CT {vmid} ({name}) RAM bei {pct}%")
errors = loki_client.get_errors(hours=0.5, limit=50)
error_lines = [e for e in errors if "error" not in e]
panic_lines = [e for e in error_lines if
any(w in e.get("line", "").lower() for w in ["panic", "fatal", "oom", "out of memory"])
and "query=" not in e.get("line", "")
and "caller=metrics" not in e.get("line", "")
]
if panic_lines:
hosts = set(e.get("host", "?") for e in panic_lines)
hosts.discard("${HOSTNAME}")
if hosts:
alerts.append(f"🔴 Kritische Fehler (panic/fatal/OOM) auf: {', '.join(hosts)}")
silent = loki_client.check_silence(minutes=35)
if silent and "error" not in silent[0]:
names = [s["host"] for s in silent]
alerts.append(f"⚠️ Keine Logs seit 35+ Min: {', '.join(names)}")
return alerts
def format_report() -> str:
"""Tagesbericht: Gesamtstatus aller Systeme."""
cfg = config.parse_config()
lines = ["📋 Tagesbericht Homelab\n"]
containers = proxmox_client.get_all_containers(
_get_passwords(cfg), _get_tokens(cfg)
)
running = [c for c in containers if c.get("status") == "running"]
stopped = [c for c in containers if c.get("status") == "stopped"]
errors_ct = [c for c in containers if "error" in c]
lines.append(f"Container: {len(running)} running, {len(stopped)} stopped, {len(errors_ct)} nicht erreichbar")
errors = loki_client.get_errors(hours=24, limit=100)
error_count = len([e for e in errors if "error" not in e])
lines.append(f"Fehler (24h): {error_count}")
silent = loki_client.check_silence(minutes=35)
if silent and "error" not in (silent[0] if silent else {}):
names = [s["host"] for s in silent]
lines.append(f"Stille Hosts: {', '.join(names)}")
else:
lines.append("Stille Hosts: keine")
alerts = check_all()
if alerts:
lines.append(f"\n⚠️ {len(alerts)} aktive Alarme:")
lines.extend(alerts)
else:
lines.append("\n✅ Keine Alarme — alles läuft.")
return "\n".join(lines)
def send_alert(token: str, chat_id: str, message: str):
"""Sendet eine Nachricht via Telegram."""
requests.post(
f"https://api.telegram.org/bot{token}/sendMessage",
data={"chat_id": chat_id, "text": message},
timeout=10,
)
def run_check_and_alert():
"""Hauptfunktion für Cron: prüft und sendet Alerts falls nötig."""
cfg = config.parse_config()
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
chat_id = cfg.raw.get("TG_CHAT_ID", "")
if not token or not chat_id:
return
alerts = check_all()
if alerts:
msg = "🔧 Hausmeister-Check\n\n" + "\n".join(alerts)
send_alert(token, chat_id, msg)
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "report":
print(format_report())
else:
run_check_and_alert()

View file

@ -0,0 +1,2 @@
python-telegram-bot>=21.0
requests>=2.31

View file

@ -0,0 +1,224 @@
"""Orbitalo Hausmeister — Telegram Bot für Homelab-Management."""
import asyncio
import logging
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from telegram import BotCommand, Update
from telegram.ext import (
Application, CommandHandler, MessageHandler, filters, ContextTypes,
)
BOT_COMMANDS = [
BotCommand("status", "Alle Container"),
BotCommand("errors", "Aktuelle Fehler"),
BotCommand("ct", "Container-Detail (/ct 109)"),
BotCommand("health", "Health-Check (/health wordpress)"),
BotCommand("logs", "Letzte Logs (/logs rss-manager)"),
BotCommand("silence", "Stille Hosts"),
BotCommand("report", "Tagesbericht"),
BotCommand("check", "Monitoring-Check"),
BotCommand("start", "Hilfe anzeigen"),
]
import context
import llm
import monitor
from core import config
logging.basicConfig(
format="%(asctime)s [%(name)s] %(levelname)s: %(message)s",
level=logging.INFO,
)
log = logging.getLogger("hausmeister")
ALLOWED_CHAT_IDS: set[int] = set()
def _load_token_and_chat():
cfg = config.parse_config()
token = cfg.raw.get("TG_HAUSMEISTER_TOKEN", "")
chat_id = cfg.raw.get("TG_CHAT_ID", "")
if chat_id:
ALLOWED_CHAT_IDS.add(int(chat_id))
return token
def _authorized(update: Update) -> bool:
if not ALLOWED_CHAT_IDS:
return True
return update.effective_chat.id in ALLOWED_CHAT_IDS
async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text(
"🔧 Orbitalo Hausmeister-Bot\n\n"
"Befehle:\n"
"/status — Alle Container\n"
"/errors — Aktuelle Fehler\n"
"/ct <nr> — Container-Detail\n"
"/health <name> — Health-Check\n"
"/logs <name> — Letzte Logs\n"
"/silence — Stille Hosts\n"
"/report — Tagesbericht\n"
"/check — Monitoring-Check\n\n"
"Oder einfach eine Frage stellen!"
)
async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Lade Container-Status...")
try:
text = context.gather_status()
if len(text) > 4000:
text = text[:4000] + "\n..."
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_errors(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Suche Fehler...")
try:
text = context.gather_errors(hours=2)
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_ct(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte CT-Nummer angeben: /ct 109")
return
try:
text = context.gather_container_status(args[0])
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_health(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte Hostname angeben: /health wordpress")
return
try:
text = context.gather_health(args[0])
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_logs(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
args = ctx.args
if not args:
await update.message.reply_text("Bitte Hostname angeben: /logs rss-manager")
return
try:
text = context.gather_logs(args[0])
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_silence(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
try:
text = context.gather_silence()
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_report(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Erstelle Tagesbericht...")
try:
text = monitor.format_report()
await update.message.reply_text(text[:4000])
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def cmd_check(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not _authorized(update):
return
await update.message.reply_text("⏳ Prüfe Systeme...")
try:
alerts = monitor.check_all()
if alerts:
text = f"⚠️ {len(alerts)} Alarme:\n\n" + "\n".join(alerts)
else:
text = "✅ Keine Alarme — alles läuft."
await update.message.reply_text(text)
except Exception as e:
await update.message.reply_text(f"Fehler: {e}")
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"""Freitext-Fragen → Kontext sammeln → LLM → Antwort."""
if not _authorized(update):
return
question = update.message.text
if not question:
return
await update.message.reply_text("🤔 Denke nach...")
try:
data = context.gather_context_for_question(question)
answer = llm.ask(question, data)
await update.message.reply_text(answer[:4000])
except Exception as e:
log.exception("Fehler bei Freitext")
await update.message.reply_text(f"Fehler: {e}")
def main():
token = _load_token_and_chat()
if not token:
log.error("TG_HAUSMEISTER_TOKEN fehlt in homelab.conf!")
sys.exit(1)
log.info("Starte Orbitalo Hausmeister-Bot...")
app = Application.builder().token(token).build()
app.add_handler(CommandHandler("start", cmd_start))
app.add_handler(CommandHandler("status", cmd_status))
app.add_handler(CommandHandler("errors", cmd_errors))
app.add_handler(CommandHandler("ct", cmd_ct))
app.add_handler(CommandHandler("health", cmd_health))
app.add_handler(CommandHandler("logs", cmd_logs))
app.add_handler(CommandHandler("silence", cmd_silence))
app.add_handler(CommandHandler("report", cmd_report))
app.add_handler(CommandHandler("check", cmd_check))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
async def post_init(application):
await application.bot.set_my_commands(BOT_COMMANDS)
log.info("Kommandomenü registriert")
app.post_init = post_init
log.info("Bot läuft — polling gestartet")
app.run_polling(allowed_updates=Update.ALL_TYPES)
if __name__ == "__main__":
main()