#!/usr/bin/env python3 """sync_state.py — Ersetzt sync-state.sh mit Core-Modules. Läuft alle 15 Min auf pve-hetzner via Cron. - Generiert STATE.md Dateien aus homelab.conf + Live-Daten - Service-Watchdog mit Telegram-Alerts - Git Commit & Push nach Forgejo Nutzt dieselben Core-Module wie MCP-Server und Telegram-Bot. """ import os import sys import time import subprocess import json from datetime import datetime from pathlib import Path os.environ.setdefault("PATH", "/usr/sbin:/usr/local/sbin:/usr/local/bin:/usr/bin:/sbin:/bin") CORE_PATH = Path("/root/homelab-mcp/core") sys.path.insert(0, str(CORE_PATH.parent)) from core import config REPO = Path("/opt/homelab-brain") DEBOUNCE_DIR = Path("/tmp/homelab_watchdog") DEBOUNCE_DIR.mkdir(exist_ok=True) NOW = datetime.now() DATE = NOW.strftime("%Y-%m-%d %H:%M") CHANGED = False def log(msg: str): print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True) # ── Telegram ────────────────────────────────────────── def tg_alert(cfg: config.HomelabConfig, key: str, msg: str): token = cfg.telegram.get("tg_mutter_token", "") chat_id = cfg.raw.get("TG_CHAT_ID", "") if not token or not chat_id: return lockfile = DEBOUNCE_DIR / f"{key}.lock" now = int(time.time()) if lockfile.exists(): try: last = int(lockfile.read_text().strip()) if now - last < 600: return except ValueError: pass subprocess.run([ "curl", "-s", "-X", "POST", f"https://api.telegram.org/bot{token}/sendMessage", "-d", f"chat_id={chat_id}", "-d", f"text=Homelab Watchdog%0A%0A{msg}", "-d", "parse_mode=Markdown", ], capture_output=True, timeout=10) lockfile.write_text(str(now)) log(f"Alert gesendet: {key}") def tg_recovery(cfg: config.HomelabConfig, key: str, name: str): token = cfg.telegram.get("tg_mutter_token", "") chat_id = cfg.raw.get("TG_CHAT_ID", "") lockfile = DEBOUNCE_DIR / f"{key}.lock" if lockfile.exists() and token and chat_id: subprocess.run([ "curl", "-s", "-X", "POST", f"https://api.telegram.org/bot{token}/sendMessage", "-d", f"chat_id={chat_id}", "-d", f"text=*{name}* wieder online", "-d", "parse_mode=Markdown", ], capture_output=True, timeout=10) lockfile.unlink(missing_ok=True) log(f"Recovery: {name} wieder online") # ── Service Checks (pct exec) ──────────────────────── def pct_exec(ct: int, cmd: str, timeout: int = 15) -> str: try: r = subprocess.run( ["pct", "exec", str(ct), "--", "bash", "-c", cmd], capture_output=True, text=True, timeout=timeout, ) return r.stdout.strip() except (subprocess.TimeoutExpired, Exception): return "" def check_service(cfg: config.HomelabConfig, ct: int, service: str, name: str) -> str: status = "unknown" for attempt in range(3): result = pct_exec(ct, f"systemctl is-active {service}") if result == "active": status = "active" break if attempt < 2: time.sleep(2) else: status = result or "unknown" if status != "active": tg_alert(cfg, f"service_{service}", f"*{name}* ist DOWN%0AService: {service}%0ACT: {ct}%0AStatus: {status}") return "DOWN" tg_recovery(cfg, f"service_{service}", name) return "active" def check_docker(cfg: config.HomelabConfig, ct: int, container: str, name: str) -> str: status = "unknown" for attempt in range(3): result = pct_exec(ct, f"docker inspect --format='{{{{.State.Status}}}}' {container}") if result == "running": status = "running" break if attempt < 2: time.sleep(2) else: status = result or "unknown" if status != "running": tg_alert(cfg, container.replace("-", "_"), f"*{name}* ist DOWN%0AStatus: {status}%0ACT: {ct}") else: tg_recovery(cfg, container.replace("-", "_"), name) return status # ── STATE.md Generatoren ───────────────────────────── def generate_arakava_state(cfg: config.HomelabConfig) -> str: log("Sammle Arakava News Status...") rss_status = pct_exec(109, "systemctl is-active rss-manager") or "unknown" wp_status = pct_exec(101, "docker inspect --format='{{.State.Status}}' wordpress-app") or "unknown" feed_cmd = ( "python3 -c \"" "import sqlite3;" " db = sqlite3.connect('/opt/rss-manager/rss_manager.db');" " rows = db.execute('SELECT name, last_run FROM feeds WHERE enabled=1 ORDER BY last_run DESC LIMIT 5').fetchall();" " [print(f' {r[0]}: {r[1] or chr(110)+chr(105)+chr(101)}') for r in rows]" "\"" ) feed_activity = pct_exec(109, feed_cmd) or " (nicht abrufbar)" or_key = cfg.api_keys.get("openrouter_key", "") or_cmd = ( "python3 -c \"" "import requests\n" "try:\n" " r = requests.get('https://openrouter.ai/api/v1/auth/key'," " headers={'Authorization': 'Bearer " + or_key + "'}, timeout=5)\n" " d = r.json().get('data', {})\n" " remaining = float(d.get('limit', 20)) - float(d.get('usage', 0))\n" " print(f'${remaining:.2f} verbleibend')\n" # noqa: not an f-string, goes to shell "except Exception as e:\n" " print(f'(nicht abrufbar: {e})')\n" "\"" ) or_balance = pct_exec(109, or_cmd) or "(nicht abrufbar)" errors = pct_exec(109, "grep -c 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null || echo 0") or "0" last_error = pct_exec(109, "grep 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null | tail -1 || echo 'keine'") or "keine" ct_101 = config.get_container(cfg, vmid=101) ct_109 = config.get_container(cfg, vmid=109) ct_600 = config.get_container(cfg, vmid=600) ct_601 = config.get_container(cfg, vmid=601) rss_url = f"http://{ct_109.tailscale_ip}:8080" if ct_109 and ct_109.tailscale_ip else "—" matomo_url = f"https://{cfg.domains.get('matomo', '')}" blog_url = f"https://{cfg.domains.get('primary', '')}" admin_url = f"{blog_url}/wp-admin" pw_admin = cfg.passwords.get("wp_admin", "?") pw_default = cfg.passwords.get("default", "?") feed_table_cmd = ( "python3 -c \"" "import sqlite3;" " db = sqlite3.connect('/opt/rss-manager/rss_manager.db');" " rows = db.execute('SELECT id, name, schedule FROM feeds WHERE enabled=1 ORDER BY id').fetchall();" " [print(f'| {r[0]} | {r[1]} | {r[2]} |') for r in rows]" "\"" ) feed_table = pct_exec(109, feed_table_cmd) or "| — | (nicht abrufbar) | — |" def ct_row(ct, extra=""): if not ct: return "| ? | ? | ? | ? |" s = extra or ct.services return f"| {ct.vmid} | {s} | {ct.tailscale_ip or '—'} |" return f"""# Arakava News — Live State > Auto-generiert: {DATE} ## Service Status | Service | CT | Status | |---|---|---| | rss-manager | 109 | {rss_status} | | WordPress Docker | 101 | {wp_status} | ## Letzte Feed-Aktivität (Top 5) {feed_activity} ## Fehler (letzte 24h) - Fehler gesamt: {errors} - Letzter Fehler: {last_error} ## OpenRouter Guthaben {or_balance} ## URLs - Blog: {blog_url} - Admin: {admin_url} (admin / {pw_admin}) - RSS Manager: {rss_url} (admin / {pw_default}) - Matomo: {matomo_url} (admin / {pw_default}) ## Container (Primary — pve-hetzner) | CT | Dienst | Tailscale | |---|---|---| {ct_row(ct_101)} {ct_row(ct_109)} ## Container (Mirror — pve3 Muldenstein) | CT | Dienst | Tailscale | |---|---|---| {ct_row(ct_600)} {ct_row(ct_601)} ## Aktive Feeds | ID | Name | Schedule | |---|---|---| {feed_table} ## Code (CT 109: /opt/rss-manager/) poster.py, scheduler.py, app.py, db.py ## Änderungshistorie - 08.03.2026: Domain arakavanews.com live, Mirror CT 600/601 auf pve3 - 08.03.2026: homelab.conf als zentrale Quelle der Wahrheit - 24.02.2026: Scheduler Lock gegen Doppelstarts - 24.02.2026: Telegram auf HTML-Modus (Sonderzeichen-Fix) - 24.02.2026: Werbeartikel-Blacklist (Anzeige:, Sponsored, etc.) - 23.02.2026: Matomo von CT 113 → CT 109 migriert """ def generate_infra_state(cfg: config.HomelabConfig) -> str: log("Sammle Infrastruktur Status...") disk_root = subprocess.run( ["df", "-h", "/"], capture_output=True, text=True ).stdout.strip().split("\n") disk_root_info = " ".join(disk_root[-1].split()[3:5]) if len(disk_root) > 1 else "n/a" disk_data = "n/a" r = subprocess.run(["df", "-h", "/var/lib/vz"], capture_output=True, text=True) if r.returncode == 0: parts = r.stdout.strip().split("\n") if len(parts) > 1: disk_data = " ".join(parts[-1].split()[3:5]) hetzner_cts = [c for c in cfg.containers if c.host == "pve-hetzner"] pve1_cts = [c for c in cfg.containers if c.host == "pve1"] pve3_cts = [c for c in cfg.containers if c.host == "pve3"] def ct_table(cts, cols=("CT", "Name", "Tailscale IP", "Dienste")): header = "| " + " | ".join(cols) + " |" sep = "|" + "|".join(["---"] * len(cols)) + "|" rows = [] for c in sorted(cts, key=lambda x: x.vmid): ts = c.tailscale_ip or "—" rows.append(f"| {c.vmid} | {c.name} | {ts} | {c.services} |") return f"{header}\n{sep}\n" + "\n".join(rows) if rows else "(keine)" tunnel_lines = [] for t in cfg.tunnels: status_label = "Standby" if t.status == "standby" else "aktiv" tunnel_lines.append(f"- CT {t.ct_id}: {t.domain} → {t.target} ({status_label})") tunnel_text = "\n".join(tunnel_lines) if tunnel_lines else "- keine" pw_hetzner = cfg.passwords.get("hetzner", "?") pw_default = cfg.passwords.get("default", "?") return f"""# Infrastruktur — Live State > Auto-generiert: {DATE} ## pve-hetzner Disk | Mount | Belegt | |---|---| | / (root) | {disk_root_info} | | /var/lib/vz (VMs/CTs) | {disk_data} | ## Aktive Container auf pve-hetzner {ct_table(hetzner_cts)} ## Container auf pve1 (Kambodscha) {ct_table(pve1_cts)} ## Container auf pve3 (Muldenstein) {ct_table(pve3_cts)} ## Routing (Cloudflare Tunnels) {tunnel_text} ## Zugangsdaten - pve-hetzner: root / {pw_hetzner} - pve1: root / {pw_default} - Alle CTs: root / {pw_default} ## Telegram Bots | Bot | Zweck | |---|---| | @MutterbotAI_bot | Watchdog-Alerts | | @Orbitalo_Hausmeister_bot | Homelab AI-Bot | """ def generate_smarthome_state(cfg: config.HomelabConfig) -> str: log("Sammle Smart Home Status...") backup_dir = Path("/home/backup-muldenstein/backups") if backup_dir.exists(): backups = sorted(backup_dir.glob("*.tar.gz"), key=lambda p: p.stat().st_mtime, reverse=True) if backups: stat = backups[0].stat() size_mb = stat.st_size // (1024 * 1024) mtime = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M") last_backup = f"{size_mb}MB, {mtime}" else: last_backup = "keine Backups gefunden" backup_count = str(len(backups)) else: last_backup = "Verzeichnis nicht vorhanden" backup_count = "0" grafana_url = f"https://{cfg.domains.get('grafana', 'grafana.orbitalo.net')}" return f"""# Smart Home Muldenstein — Live State > Auto-generiert: {DATE} ## Backup-Status - Letztes Backup: {last_backup} - Backups gesamt: {backup_count} - Ziel: /home/backup-muldenstein/backups/ (CT 144) ## Services (CT 143) | Dienst | URL | |---|---| | Grafana | {grafana_url} | | ioBroker | http://192.168.178.36:8081 | | InfluxDB | http://192.168.178.36:8086 | ## Grafana Alerts → Telegram {cfg.raw.get('TG_CHAT_ID', '?')} - Promtail DOWN (> 5 Min keine Daten) - CPU > 70% - Memory > 80% - Disk > 90% ## Backup-Zeitplan - täglich 04:00 → /root/backup-to-hetzner.sh (auf pve3) - Retention: 30d tägl, 90d wöchl, unbegrenzt monatl """ # ── Git Operations ──────────────────────────────────── def git_sync(cfg: config.HomelabConfig): forgejo_token = cfg.api_keys.get("forgejo_sync_token", "") ct_111 = config.get_container(cfg, vmid=111) forgejo_ip = ct_111.tailscale_ip if ct_111 else "100.89.246.60" forgejo_url = f"http://orbitalo:{forgejo_token}@{forgejo_ip}:3000/orbitalo/homelab-brain.git" subprocess.run( ["git", "-C", str(REPO), "fetch", forgejo_url, "main", "--quiet"], capture_output=True, timeout=30, ) subprocess.run( ["git", "-C", str(REPO), "reset", "--hard", "FETCH_HEAD"], capture_output=True, timeout=15, ) return forgejo_url def git_commit_and_push(cfg: config.HomelabConfig, forgejo_url: str): subprocess.run(["git", "-C", str(REPO), "add", "-A"], capture_output=True, timeout=15) subprocess.run( ["git", "-C", str(REPO), "-c", "user.email=sync@homelab", "-c", "user.name=Auto-Sync", "commit", "-m", f"Auto-Sync: {DATE}", "--quiet"], capture_output=True, timeout=15, ) r = subprocess.run( ["git", "-C", str(REPO), "push", forgejo_url, "main", "--quiet"], capture_output=True, text=True, timeout=30, ) if r.returncode == 0: log("Push erfolgreich") (DEBOUNCE_DIR / "git_push.lock").unlink(missing_ok=True) else: err = r.stderr.split("\n")[0] if r.stderr else "unbekannt" log(f"Push FEHLER: {err}") tg_alert(cfg, "git_push", f"*Homelab Git-Sync fehlgeschlagen*%0A%0AFehler: {err}%0AZeit: {DATE}") # ── Main ────────────────────────────────────────────── def main(): global CHANGED log("Sync startet...") cfg = config.parse_config(REPO / "homelab.conf") forgejo_url = git_sync(cfg) cfg = config.parse_config(REPO / "homelab.conf") # Watchdog log("Watchdog läuft...") check_service(cfg, 109, "rss-manager", "RSS Manager") check_docker(cfg, 101, "wordpress-app", "WordPress Docker") # STATE.md Dateien generieren states = { "arakava-news/STATE.md": generate_arakava_state(cfg), "infrastructure/STATE.md": generate_infra_state(cfg), "smart-home/STATE.md": generate_smarthome_state(cfg), } for path, content in states.items(): full_path = REPO / path full_path.parent.mkdir(parents=True, exist_ok=True) full_path.write_text(content) CHANGED = True log(f"{path} aktualisiert") if CHANGED: git_commit_and_push(cfg, forgejo_url) else: log("Keine Änderungen") log("Sync abgeschlossen") if __name__ == "__main__": main()