feat: KI-Systemvorhersage (tools/predict.py) + taegl. 08:00 Job
- tools/predict.py: sammelt Disk-Trends (Prometheus), Fehler-Logs (Loki), Container-Status (Proxmox) und laesst lokales LLM eine Prognose erstellen - telegram_bot.py: daily_forecast Job taegl. 08:00 Uhr, sendet Prognose via Telegram - llm.py: Forecast-Trigger (vorhersage, prognose, was bahnt sich an etc.) -> lokal
This commit is contained in:
parent
5a93736a60
commit
c4553b46d7
3 changed files with 249 additions and 0 deletions
|
|
@ -35,6 +35,8 @@ _LOCAL_OVERRIDES = [
|
||||||
"savetv", "save.tv", "filmtipp", "aufnahme",
|
"savetv", "save.tv", "filmtipp", "aufnahme",
|
||||||
"wordpress", "matomo", "tailscale",
|
"wordpress", "matomo", "tailscale",
|
||||||
"unsere api", "meine api", "die api",
|
"unsere api", "meine api", "die api",
|
||||||
|
"vorhersage", "prognose", "health forecast", "was bahnt", "systemstatus",
|
||||||
|
"system check", "system-check", "wie gehts dem system", "wie geht es dem system",
|
||||||
]
|
]
|
||||||
_WEB_TRIGGERS = [
|
_WEB_TRIGGERS = [
|
||||||
"recherche", "recherchiere", "suche im internet", "web search",
|
"recherche", "recherchiere", "suche im internet", "web search",
|
||||||
|
|
|
||||||
|
|
@ -820,6 +820,21 @@ async def handle_callback(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
async def _send_daily_forecast(context):
|
||||||
|
"""Taeglich 08:00 Uhr: KI-Systemvorhersage senden."""
|
||||||
|
if not CHAT_ID:
|
||||||
|
return
|
||||||
|
bot = getattr(context, "bot", None) or context
|
||||||
|
try:
|
||||||
|
from tools.predict import handle_get_health_forecast
|
||||||
|
result = await asyncio.get_event_loop().run_in_executor(None, handle_get_health_forecast)
|
||||||
|
await bot.send_message(chat_id=CHAT_ID, text=result, parse_mode="Markdown")
|
||||||
|
log.info("Taegl. Systemvorhersage gesendet")
|
||||||
|
except Exception:
|
||||||
|
log.exception("Fehler beim Senden der Systemvorhersage")
|
||||||
|
|
||||||
|
|
||||||
async def _send_daily_filmtipps(context):
|
async def _send_daily_filmtipps(context):
|
||||||
"""Täglicher Cronjob: EPG scannen, Top-Filme auto-aufnehmen, Rest vorschlagen.
|
"""Täglicher Cronjob: EPG scannen, Top-Filme auto-aufnehmen, Rest vorschlagen.
|
||||||
|
|
||||||
|
|
@ -933,6 +948,12 @@ def main():
|
||||||
name="daily_filmtipps",
|
name="daily_filmtipps",
|
||||||
)
|
)
|
||||||
log.info("Täglicher Filmtipp-Job registriert (14:00 Uhr)")
|
log.info("Täglicher Filmtipp-Job registriert (14:00 Uhr)")
|
||||||
|
app.job_queue.run_daily(
|
||||||
|
_send_daily_forecast,
|
||||||
|
time=dtime(hour=8, minute=0),
|
||||||
|
name="daily_forecast",
|
||||||
|
)
|
||||||
|
log.info("Täglicher Forecast-Job registriert (08:00 Uhr)")
|
||||||
else:
|
else:
|
||||||
log.warning("JobQueue nicht verfügbar — Filmtipps werden per asyncio-Loop gesendet")
|
log.warning("JobQueue nicht verfügbar — Filmtipps werden per asyncio-Loop gesendet")
|
||||||
|
|
||||||
|
|
|
||||||
226
homelab-ai-bot/tools/predict.py
Normal file
226
homelab-ai-bot/tools/predict.py
Normal file
|
|
@ -0,0 +1,226 @@
|
||||||
|
"""KI-gestützte Systemvorhersage — analysiert Logs, Metriken und Container-Status."""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
from core import prometheus_client, loki_client, config
|
||||||
|
from core import proxmox_client
|
||||||
|
|
||||||
|
OLLAMA_URL = "http://100.84.255.83:11434"
|
||||||
|
FORECAST_MODEL = "qwen3:30b-a3b"
|
||||||
|
|
||||||
|
TOOLS = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "get_health_forecast",
|
||||||
|
"description": (
|
||||||
|
"KI-gestützte Systemvorhersage für das Homelab. Analysiert Fehler-Logs, "
|
||||||
|
"Disk-Trends, CPU/RAM-Auslastung und Container-Status. Gibt eine Prognose "
|
||||||
|
"aus, ob sich Probleme anbahnen — z.B. voller Speicher, häufige Abstürze, "
|
||||||
|
"steigende Fehlerquoten. Trigger: 'vorhersage', 'was bahnt sich an', "
|
||||||
|
"'prognose', 'health forecast', 'system check', 'systemstatus'."
|
||||||
|
),
|
||||||
|
"parameters": {"type": "object", "properties": {}, "required": []},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _gather_prometheus() -> dict:
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
result["warnings"] = prometheus_client.get_warnings()
|
||||||
|
|
||||||
|
disk = prometheus_client.get_disk()
|
||||||
|
result["disk_current"] = [
|
||||||
|
{"host": r["host"], "used_pct": round(r["value"], 1)} for r in disk
|
||||||
|
]
|
||||||
|
|
||||||
|
trend = prometheus_client.range_query(
|
||||||
|
'max by (host) ((1 - node_filesystem_avail_bytes{mountpoint="/"} '
|
||||||
|
'/ node_filesystem_size_bytes{mountpoint="/"}) * 100)',
|
||||||
|
hours=24,
|
||||||
|
step="2h",
|
||||||
|
)
|
||||||
|
trends = {}
|
||||||
|
if trend.get("status") == "success":
|
||||||
|
for r in trend.get("data", {}).get("result", []):
|
||||||
|
h = r.get("metric", {}).get("host", "?")
|
||||||
|
vals = [float(v[1]) for v in r.get("values", []) if v[1] != "NaN"]
|
||||||
|
if len(vals) >= 2:
|
||||||
|
delta = vals[-1] - vals[0]
|
||||||
|
trends[h] = {
|
||||||
|
"start_pct": round(vals[0], 1),
|
||||||
|
"end_pct": round(vals[-1], 1),
|
||||||
|
"delta_24h": round(delta, 2),
|
||||||
|
}
|
||||||
|
result["disk_trend_24h"] = trends
|
||||||
|
|
||||||
|
mem = prometheus_client.get_memory()
|
||||||
|
result["memory"] = [
|
||||||
|
{"host": r["host"], "used_pct": round(r["value"], 1)} for r in mem
|
||||||
|
]
|
||||||
|
|
||||||
|
load = prometheus_client.get_load()
|
||||||
|
result["load5"] = [
|
||||||
|
{"host": r["host"], "load5": round(r["value"], 2)} for r in load
|
||||||
|
]
|
||||||
|
except Exception as e:
|
||||||
|
result["prometheus_error"] = str(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _gather_loki() -> dict:
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
hosts = loki_client.get_labels()
|
||||||
|
error_counts = {}
|
||||||
|
for host in hosts[:20]:
|
||||||
|
errors = loki_client.get_errors(container=host, hours=24, limit=300)
|
||||||
|
count = 0 if (len(errors) == 1 and "error" in errors[0]) else len(errors)
|
||||||
|
if count > 0:
|
||||||
|
error_counts[host] = count
|
||||||
|
result["errors_24h"] = error_counts
|
||||||
|
|
||||||
|
silent = loki_client.check_silence(minutes=60)
|
||||||
|
result["silent_hosts"] = [s["host"] for s in silent if "host" in s]
|
||||||
|
except Exception as e:
|
||||||
|
result["loki_error"] = str(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _gather_proxmox() -> dict:
|
||||||
|
result = {}
|
||||||
|
try:
|
||||||
|
cfg = config.parse_config()
|
||||||
|
passwords = {}
|
||||||
|
tokens = {}
|
||||||
|
for pve_host in cfg.proxmox_hosts:
|
||||||
|
name = pve_host.get("name", "")
|
||||||
|
pw = pve_host.get("password", "")
|
||||||
|
tok_name = pve_host.get("token_name", "")
|
||||||
|
tok_val = pve_host.get("token_value", "")
|
||||||
|
if pw:
|
||||||
|
passwords[name] = pw
|
||||||
|
if tok_name and tok_val:
|
||||||
|
tokens[name] = {"name": tok_name, "value": tok_val}
|
||||||
|
containers = proxmox_client.get_all_containers(passwords=passwords, tokens=tokens)
|
||||||
|
stopped = [
|
||||||
|
{"id": c.get("vmid"), "name": c.get("name", "?")}
|
||||||
|
for c in containers
|
||||||
|
if c.get("status") == "stopped" and "error" not in c
|
||||||
|
]
|
||||||
|
running = len([c for c in containers if c.get("status") == "running"])
|
||||||
|
result["total"] = len(containers)
|
||||||
|
result["running"] = running
|
||||||
|
result["stopped"] = stopped
|
||||||
|
except Exception as e:
|
||||||
|
result["proxmox_error"] = str(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _call_analysis_llm(data_summary: str) -> str:
|
||||||
|
now_str = datetime.now().strftime("%d.%m.%Y %H:%M")
|
||||||
|
prompt = (
|
||||||
|
f"Du bist ein Homelab-Monitoring-Experte. Heute ist der {now_str}.\n"
|
||||||
|
"Analysiere die folgenden System-Rohdaten und erstelle eine kompakte Prognose.\n\n"
|
||||||
|
"REGELN:\n"
|
||||||
|
"- Nur echte Auffälligkeiten nennen (nicht jede normale Metrik)\n"
|
||||||
|
"- Disk-Delta > 2% in 24h = Warnung\n"
|
||||||
|
"- Disk > 80% = kritisch\n"
|
||||||
|
"- RAM > 85% = Warnung\n"
|
||||||
|
"- Fehler > 50 in 24h für einen Host = Warnung\n"
|
||||||
|
"- Gestoppte Container = prüfen ob OK\n"
|
||||||
|
"- Wenn alles normal: kurze Entwarnung genügt\n"
|
||||||
|
"- Max 12 Zeilen, Emojis erlaubt, auf Deutsch\n"
|
||||||
|
"- Klare Handlungsempfehlung wenn nötig\n\n"
|
||||||
|
f"System-Daten:\n{data_summary}\n\n"
|
||||||
|
"Prognose:"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
r = requests.post(
|
||||||
|
f"{OLLAMA_URL}/api/chat",
|
||||||
|
json={
|
||||||
|
"model": FORECAST_MODEL,
|
||||||
|
"messages": [{"role": "user", "content": prompt + " /no_think"}],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"num_predict": 700, "temperature": 0.3},
|
||||||
|
},
|
||||||
|
timeout=180,
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
content = r.json().get("message", {}).get("content", "")
|
||||||
|
# Strip <think> tags if present
|
||||||
|
import re
|
||||||
|
content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL).strip()
|
||||||
|
return content or "LLM-Analyse ergab kein Ergebnis."
|
||||||
|
except Exception as e:
|
||||||
|
return f"⚠️ LLM-Analyse nicht verfügbar: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
def handle_get_health_forecast(**kw) -> str:
|
||||||
|
prom = _gather_prometheus()
|
||||||
|
loki = _gather_loki()
|
||||||
|
pve = _gather_proxmox()
|
||||||
|
|
||||||
|
summary_parts = []
|
||||||
|
|
||||||
|
if prom.get("warnings"):
|
||||||
|
summary_parts.append("AKTIVE WARNUNGEN: " + ", ".join(prom["warnings"]))
|
||||||
|
else:
|
||||||
|
summary_parts.append("Prometheus-Warnungen: keine")
|
||||||
|
|
||||||
|
if prom.get("disk_current"):
|
||||||
|
lines = [f"{d['host']}: {d['used_pct']}%" for d in prom["disk_current"]]
|
||||||
|
summary_parts.append("Disk-Nutzung aktuell:\n " + "\n ".join(lines))
|
||||||
|
|
||||||
|
if prom.get("disk_trend_24h"):
|
||||||
|
trend_lines = []
|
||||||
|
for h, t in prom["disk_trend_24h"].items():
|
||||||
|
if abs(t["delta_24h"]) > 0.5:
|
||||||
|
trend_lines.append(
|
||||||
|
f" {h}: {t['start_pct']}% → {t['end_pct']}% (Δ {t['delta_24h']:+.1f}% in 24h)"
|
||||||
|
)
|
||||||
|
if trend_lines:
|
||||||
|
summary_parts.append("Disk-Trends (letzte 24h):\n" + "\n".join(trend_lines))
|
||||||
|
|
||||||
|
if prom.get("memory"):
|
||||||
|
high_mem = [m for m in prom["memory"] if m["used_pct"] > 70]
|
||||||
|
if high_mem:
|
||||||
|
mem_lines = [f"{m['host']}: {m['used_pct']}%" for m in high_mem]
|
||||||
|
summary_parts.append("RAM > 70%:\n " + "\n ".join(mem_lines))
|
||||||
|
|
||||||
|
if prom.get("load5"):
|
||||||
|
high_load = [l for l in prom["load5"] if l["load5"] > 2.0]
|
||||||
|
if high_load:
|
||||||
|
load_lines = [f"{l['host']}: load5={l['load5']}" for l in high_load]
|
||||||
|
summary_parts.append("Hohe Last:\n " + "\n ".join(load_lines))
|
||||||
|
|
||||||
|
errors = loki.get("errors_24h", {})
|
||||||
|
if errors:
|
||||||
|
err_lines = [f"{h}: {c} Fehler" for h, c in sorted(errors.items(), key=lambda x: -x[1])[:10]]
|
||||||
|
summary_parts.append("Log-Fehler letzte 24h:\n " + "\n ".join(err_lines))
|
||||||
|
else:
|
||||||
|
summary_parts.append("Log-Fehler letzte 24h: keine")
|
||||||
|
|
||||||
|
if loki.get("silent_hosts"):
|
||||||
|
summary_parts.append("Stille Hosts (>60 min kein Log): " + ", ".join(loki["silent_hosts"]))
|
||||||
|
|
||||||
|
if pve.get("stopped"):
|
||||||
|
stopped_names = [f"CT{c['id']} {c['name']}" for c in pve["stopped"]]
|
||||||
|
summary_parts.append("Gestoppte Container: " + ", ".join(stopped_names))
|
||||||
|
elif "proxmox_error" not in pve:
|
||||||
|
summary_parts.append(f"Proxmox: {pve.get('running', 0)}/{pve.get('total', 0)} Container laufen")
|
||||||
|
|
||||||
|
data_summary = "\n\n".join(summary_parts)
|
||||||
|
analysis = _call_analysis_llm(data_summary)
|
||||||
|
|
||||||
|
header = f"🔭 *Systemvorhersage* ({datetime.now().strftime('%d.%m.%Y %H:%M')})\n\n"
|
||||||
|
return header + analysis
|
||||||
|
|
||||||
|
|
||||||
|
HANDLERS = {
|
||||||
|
"get_health_forecast": handle_get_health_forecast,
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue