460 lines
15 KiB
Python
Executable file
460 lines
15 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""sync_state.py — Ersetzt sync-state.sh mit Core-Modules.
|
|
|
|
Läuft alle 15 Min auf pve-hetzner via Cron.
|
|
- Generiert STATE.md Dateien aus homelab.conf + Live-Daten
|
|
- Service-Watchdog mit Telegram-Alerts
|
|
- Git Commit & Push nach Forgejo
|
|
|
|
Nutzt dieselben Core-Module wie MCP-Server und Telegram-Bot.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import subprocess
|
|
import json
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
os.environ.setdefault("PATH", "/usr/sbin:/usr/local/sbin:/usr/local/bin:/usr/bin:/sbin:/bin")
|
|
|
|
CORE_PATH = Path("/root/homelab-mcp/core")
|
|
sys.path.insert(0, str(CORE_PATH.parent))
|
|
|
|
from core import config
|
|
|
|
REPO = Path("/opt/homelab-brain")
|
|
DEBOUNCE_DIR = Path("/tmp/homelab_watchdog")
|
|
DEBOUNCE_DIR.mkdir(exist_ok=True)
|
|
|
|
NOW = datetime.now()
|
|
DATE = NOW.strftime("%Y-%m-%d %H:%M")
|
|
CHANGED = False
|
|
|
|
|
|
def log(msg: str):
|
|
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
|
|
|
|
|
|
# ── Telegram ──────────────────────────────────────────
|
|
|
|
def tg_alert(cfg: config.HomelabConfig, key: str, msg: str):
|
|
token = cfg.telegram.get("tg_mutter_token", "")
|
|
chat_id = cfg.raw.get("TG_CHAT_ID", "")
|
|
if not token or not chat_id:
|
|
return
|
|
|
|
lockfile = DEBOUNCE_DIR / f"{key}.lock"
|
|
now = int(time.time())
|
|
if lockfile.exists():
|
|
try:
|
|
last = int(lockfile.read_text().strip())
|
|
if now - last < 600:
|
|
return
|
|
except ValueError:
|
|
pass
|
|
|
|
subprocess.run([
|
|
"curl", "-s", "-X", "POST",
|
|
f"https://api.telegram.org/bot{token}/sendMessage",
|
|
"-d", f"chat_id={chat_id}",
|
|
"-d", f"text=Homelab Watchdog%0A%0A{msg}",
|
|
"-d", "parse_mode=Markdown",
|
|
], capture_output=True, timeout=10)
|
|
lockfile.write_text(str(now))
|
|
log(f"Alert gesendet: {key}")
|
|
|
|
|
|
def tg_recovery(cfg: config.HomelabConfig, key: str, name: str):
|
|
token = cfg.telegram.get("tg_mutter_token", "")
|
|
chat_id = cfg.raw.get("TG_CHAT_ID", "")
|
|
lockfile = DEBOUNCE_DIR / f"{key}.lock"
|
|
|
|
if lockfile.exists() and token and chat_id:
|
|
subprocess.run([
|
|
"curl", "-s", "-X", "POST",
|
|
f"https://api.telegram.org/bot{token}/sendMessage",
|
|
"-d", f"chat_id={chat_id}",
|
|
"-d", f"text=*{name}* wieder online",
|
|
"-d", "parse_mode=Markdown",
|
|
], capture_output=True, timeout=10)
|
|
lockfile.unlink(missing_ok=True)
|
|
log(f"Recovery: {name} wieder online")
|
|
|
|
|
|
# ── Service Checks (pct exec) ────────────────────────
|
|
|
|
def pct_exec(ct: int, cmd: str, timeout: int = 15) -> str:
|
|
try:
|
|
r = subprocess.run(
|
|
["pct", "exec", str(ct), "--", "bash", "-c", cmd],
|
|
capture_output=True, text=True, timeout=timeout,
|
|
)
|
|
return r.stdout.strip()
|
|
except (subprocess.TimeoutExpired, Exception):
|
|
return ""
|
|
|
|
|
|
def check_service(cfg: config.HomelabConfig, ct: int, service: str, name: str) -> str:
|
|
status = "unknown"
|
|
for attempt in range(3):
|
|
result = pct_exec(ct, f"systemctl is-active {service}")
|
|
if result == "active":
|
|
status = "active"
|
|
break
|
|
if attempt < 2:
|
|
time.sleep(2)
|
|
else:
|
|
status = result or "unknown"
|
|
|
|
if status != "active":
|
|
tg_alert(cfg, f"service_{service}",
|
|
f"*{name}* ist DOWN%0AService: {service}%0ACT: {ct}%0AStatus: {status}")
|
|
return "DOWN"
|
|
|
|
tg_recovery(cfg, f"service_{service}", name)
|
|
return "active"
|
|
|
|
|
|
def check_docker(cfg: config.HomelabConfig, ct: int, container: str, name: str) -> str:
|
|
status = "unknown"
|
|
for attempt in range(3):
|
|
result = pct_exec(ct, f"docker inspect --format='{{{{.State.Status}}}}' {container}")
|
|
if result == "running":
|
|
status = "running"
|
|
break
|
|
if attempt < 2:
|
|
time.sleep(2)
|
|
else:
|
|
status = result or "unknown"
|
|
|
|
if status != "running":
|
|
tg_alert(cfg, container.replace("-", "_"),
|
|
f"*{name}* ist DOWN%0AStatus: {status}%0ACT: {ct}")
|
|
else:
|
|
tg_recovery(cfg, container.replace("-", "_"), name)
|
|
return status
|
|
|
|
|
|
# ── STATE.md Generatoren ─────────────────────────────
|
|
|
|
def generate_arakava_state(cfg: config.HomelabConfig) -> str:
|
|
log("Sammle Arakava News Status...")
|
|
|
|
rss_status = pct_exec(109, "systemctl is-active rss-manager") or "unknown"
|
|
wp_status = pct_exec(101, "docker inspect --format='{{.State.Status}}' wordpress-app") or "unknown"
|
|
|
|
feed_cmd = (
|
|
"python3 -c \""
|
|
"import sqlite3;"
|
|
" db = sqlite3.connect('/opt/rss-manager/rss_manager.db');"
|
|
" rows = db.execute('SELECT name, last_run FROM feeds WHERE enabled=1 ORDER BY last_run DESC LIMIT 5').fetchall();"
|
|
" [print(f' {r[0]}: {r[1] or chr(110)+chr(105)+chr(101)}') for r in rows]"
|
|
"\""
|
|
)
|
|
feed_activity = pct_exec(109, feed_cmd) or " (nicht abrufbar)"
|
|
|
|
or_key = cfg.api_keys.get("openrouter_key", "")
|
|
or_cmd = (
|
|
"python3 -c \""
|
|
"import requests\n"
|
|
"try:\n"
|
|
" r = requests.get('https://openrouter.ai/api/v1/auth/key',"
|
|
" headers={'Authorization': 'Bearer " + or_key + "'}, timeout=5)\n"
|
|
" d = r.json().get('data', {})\n"
|
|
" remaining = float(d.get('limit', 20)) - float(d.get('usage', 0))\n"
|
|
" print(f'${remaining:.2f} verbleibend')\n" # noqa: not an f-string, goes to shell
|
|
"except Exception as e:\n"
|
|
" print(f'(nicht abrufbar: {e})')\n"
|
|
"\""
|
|
)
|
|
or_balance = pct_exec(109, or_cmd) or "(nicht abrufbar)"
|
|
|
|
errors = pct_exec(109, "grep -c 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null || echo 0") or "0"
|
|
last_error = pct_exec(109, "grep 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null | tail -1 || echo 'keine'") or "keine"
|
|
|
|
ct_101 = config.get_container(cfg, vmid=101)
|
|
ct_109 = config.get_container(cfg, vmid=109)
|
|
ct_600 = config.get_container(cfg, vmid=600)
|
|
ct_601 = config.get_container(cfg, vmid=601)
|
|
|
|
rss_url = f"http://{ct_109.tailscale_ip}:8080" if ct_109 and ct_109.tailscale_ip else "—"
|
|
matomo_url = f"https://{cfg.domains.get('matomo', '')}"
|
|
blog_url = f"https://{cfg.domains.get('primary', '')}"
|
|
admin_url = f"{blog_url}/wp-admin"
|
|
pw_admin = cfg.passwords.get("wp_admin", "?")
|
|
pw_default = cfg.passwords.get("default", "?")
|
|
|
|
feed_table_cmd = (
|
|
"python3 -c \""
|
|
"import sqlite3;"
|
|
" db = sqlite3.connect('/opt/rss-manager/rss_manager.db');"
|
|
" rows = db.execute('SELECT id, name, schedule FROM feeds WHERE enabled=1 ORDER BY id').fetchall();"
|
|
" [print(f'| {r[0]} | {r[1]} | {r[2]} |') for r in rows]"
|
|
"\""
|
|
)
|
|
feed_table = pct_exec(109, feed_table_cmd) or "| — | (nicht abrufbar) | — |"
|
|
|
|
def ct_row(ct, extra=""):
|
|
if not ct:
|
|
return "| ? | ? | ? | ? |"
|
|
s = extra or ct.services
|
|
return f"| {ct.vmid} | {s} | {ct.tailscale_ip or '—'} |"
|
|
|
|
return f"""# Arakava News — Live State
|
|
> Auto-generiert: {DATE}
|
|
|
|
## Service Status
|
|
| Service | CT | Status |
|
|
|---|---|---|
|
|
| rss-manager | 109 | {rss_status} |
|
|
| WordPress Docker | 101 | {wp_status} |
|
|
|
|
## Letzte Feed-Aktivität (Top 5)
|
|
{feed_activity}
|
|
|
|
## Fehler (letzte 24h)
|
|
- Fehler gesamt: {errors}
|
|
- Letzter Fehler: {last_error}
|
|
|
|
## OpenRouter Guthaben
|
|
{or_balance}
|
|
|
|
## URLs
|
|
- Blog: {blog_url}
|
|
- Admin: {admin_url} (admin / {pw_admin})
|
|
- RSS Manager: {rss_url} (admin / {pw_default})
|
|
- Matomo: {matomo_url} (admin / {pw_default})
|
|
|
|
## Container (Primary — pve-hetzner)
|
|
| CT | Dienst | Tailscale |
|
|
|---|---|---|
|
|
{ct_row(ct_101)}
|
|
{ct_row(ct_109)}
|
|
|
|
## Container (Mirror — pve3 Muldenstein)
|
|
| CT | Dienst | Tailscale |
|
|
|---|---|---|
|
|
{ct_row(ct_600)}
|
|
{ct_row(ct_601)}
|
|
|
|
## Aktive Feeds
|
|
| ID | Name | Schedule |
|
|
|---|---|---|
|
|
{feed_table}
|
|
|
|
## Code (CT 109: /opt/rss-manager/)
|
|
poster.py, scheduler.py, app.py, db.py
|
|
|
|
## Änderungshistorie
|
|
- 08.03.2026: Domain arakavanews.com live, Mirror CT 600/601 auf pve3
|
|
- 08.03.2026: homelab.conf als zentrale Quelle der Wahrheit
|
|
- 24.02.2026: Scheduler Lock gegen Doppelstarts
|
|
- 24.02.2026: Telegram auf HTML-Modus (Sonderzeichen-Fix)
|
|
- 24.02.2026: Werbeartikel-Blacklist (Anzeige:, Sponsored, etc.)
|
|
- 23.02.2026: Matomo von CT 113 → CT 109 migriert
|
|
"""
|
|
|
|
|
|
def generate_infra_state(cfg: config.HomelabConfig) -> str:
|
|
log("Sammle Infrastruktur Status...")
|
|
|
|
disk_root = subprocess.run(
|
|
["df", "-h", "/"], capture_output=True, text=True
|
|
).stdout.strip().split("\n")
|
|
disk_root_info = " ".join(disk_root[-1].split()[3:5]) if len(disk_root) > 1 else "n/a"
|
|
|
|
disk_data = "n/a"
|
|
r = subprocess.run(["df", "-h", "/var/lib/vz"], capture_output=True, text=True)
|
|
if r.returncode == 0:
|
|
parts = r.stdout.strip().split("\n")
|
|
if len(parts) > 1:
|
|
disk_data = " ".join(parts[-1].split()[3:5])
|
|
|
|
hetzner_cts = [c for c in cfg.containers if c.host == "pve-hetzner"]
|
|
pve1_cts = [c for c in cfg.containers if c.host == "pve1"]
|
|
pve3_cts = [c for c in cfg.containers if c.host == "pve3"]
|
|
|
|
def ct_table(cts, cols=("CT", "Name", "Tailscale IP", "Dienste")):
|
|
header = "| " + " | ".join(cols) + " |"
|
|
sep = "|" + "|".join(["---"] * len(cols)) + "|"
|
|
rows = []
|
|
for c in sorted(cts, key=lambda x: x.vmid):
|
|
ts = c.tailscale_ip or "—"
|
|
rows.append(f"| {c.vmid} | {c.name} | {ts} | {c.services} |")
|
|
return f"{header}\n{sep}\n" + "\n".join(rows) if rows else "(keine)"
|
|
|
|
tunnel_lines = []
|
|
for t in cfg.tunnels:
|
|
status_label = "Standby" if t.status == "standby" else "aktiv"
|
|
tunnel_lines.append(f"- CT {t.ct_id}: {t.domain} → {t.target} ({status_label})")
|
|
tunnel_text = "\n".join(tunnel_lines) if tunnel_lines else "- keine"
|
|
|
|
pw_hetzner = cfg.passwords.get("hetzner", "?")
|
|
pw_default = cfg.passwords.get("default", "?")
|
|
|
|
return f"""# Infrastruktur — Live State
|
|
> Auto-generiert: {DATE}
|
|
|
|
## pve-hetzner Disk
|
|
| Mount | Belegt |
|
|
|---|---|
|
|
| / (root) | {disk_root_info} |
|
|
| /var/lib/vz (VMs/CTs) | {disk_data} |
|
|
|
|
## Aktive Container auf pve-hetzner
|
|
{ct_table(hetzner_cts)}
|
|
|
|
## Container auf pve1 (Kambodscha)
|
|
{ct_table(pve1_cts)}
|
|
|
|
## Container auf pve3 (Muldenstein)
|
|
{ct_table(pve3_cts)}
|
|
|
|
## Routing (Cloudflare Tunnels)
|
|
{tunnel_text}
|
|
|
|
## Zugangsdaten
|
|
- pve-hetzner: root / {pw_hetzner}
|
|
- pve1: root / {pw_default}
|
|
- Alle CTs: root / {pw_default}
|
|
|
|
## Telegram Bots
|
|
| Bot | Zweck |
|
|
|---|---|
|
|
| @MutterbotAI_bot | Watchdog-Alerts |
|
|
| @Orbitalo_Hausmeister_bot | Homelab AI-Bot |
|
|
"""
|
|
|
|
|
|
def generate_smarthome_state(cfg: config.HomelabConfig) -> str:
|
|
log("Sammle Smart Home Status...")
|
|
|
|
backup_dir = Path("/home/backup-muldenstein/backups")
|
|
if backup_dir.exists():
|
|
backups = sorted(backup_dir.glob("*.tar.gz"), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
if backups:
|
|
stat = backups[0].stat()
|
|
size_mb = stat.st_size // (1024 * 1024)
|
|
mtime = datetime.fromtimestamp(stat.st_mtime).strftime("%Y-%m-%d %H:%M")
|
|
last_backup = f"{size_mb}MB, {mtime}"
|
|
else:
|
|
last_backup = "keine Backups gefunden"
|
|
backup_count = str(len(backups))
|
|
else:
|
|
last_backup = "Verzeichnis nicht vorhanden"
|
|
backup_count = "0"
|
|
|
|
grafana_url = f"https://{cfg.domains.get('grafana', 'grafana.orbitalo.net')}"
|
|
|
|
return f"""# Smart Home Muldenstein — Live State
|
|
> Auto-generiert: {DATE}
|
|
|
|
## Backup-Status
|
|
- Letztes Backup: {last_backup}
|
|
- Backups gesamt: {backup_count}
|
|
- Ziel: /home/backup-muldenstein/backups/ (CT 144)
|
|
|
|
## Services (CT 143)
|
|
| Dienst | URL |
|
|
|---|---|
|
|
| Grafana | {grafana_url} |
|
|
| ioBroker | http://192.168.178.36:8081 |
|
|
| InfluxDB | http://192.168.178.36:8086 |
|
|
|
|
## Grafana Alerts → Telegram {cfg.raw.get('TG_CHAT_ID', '?')}
|
|
- Promtail DOWN (> 5 Min keine Daten)
|
|
- CPU > 70%
|
|
- Memory > 80%
|
|
- Disk > 90%
|
|
|
|
## Backup-Zeitplan
|
|
- täglich 04:00 → /root/backup-to-hetzner.sh (auf pve3)
|
|
- Retention: 30d tägl, 90d wöchl, unbegrenzt monatl
|
|
"""
|
|
|
|
|
|
# ── Git Operations ────────────────────────────────────
|
|
|
|
def git_sync(cfg: config.HomelabConfig):
|
|
forgejo_token = cfg.api_keys.get("forgejo_sync_token", "")
|
|
ct_111 = config.get_container(cfg, vmid=111)
|
|
forgejo_ip = ct_111.tailscale_ip if ct_111 else "100.89.246.60"
|
|
forgejo_url = f"http://orbitalo:{forgejo_token}@{forgejo_ip}:3000/orbitalo/homelab-brain.git"
|
|
|
|
subprocess.run(
|
|
["git", "-C", str(REPO), "fetch", forgejo_url, "main", "--quiet"],
|
|
capture_output=True, timeout=30,
|
|
)
|
|
subprocess.run(
|
|
["git", "-C", str(REPO), "reset", "--hard", "FETCH_HEAD"],
|
|
capture_output=True, timeout=15,
|
|
)
|
|
|
|
return forgejo_url
|
|
|
|
|
|
def git_commit_and_push(cfg: config.HomelabConfig, forgejo_url: str):
|
|
subprocess.run(["git", "-C", str(REPO), "add", "-A"], capture_output=True, timeout=15)
|
|
subprocess.run(
|
|
["git", "-C", str(REPO),
|
|
"-c", "user.email=sync@homelab", "-c", "user.name=Auto-Sync",
|
|
"commit", "-m", f"Auto-Sync: {DATE}", "--quiet"],
|
|
capture_output=True, timeout=15,
|
|
)
|
|
|
|
r = subprocess.run(
|
|
["git", "-C", str(REPO), "push", forgejo_url, "main", "--quiet"],
|
|
capture_output=True, text=True, timeout=30,
|
|
)
|
|
if r.returncode == 0:
|
|
log("Push erfolgreich")
|
|
(DEBOUNCE_DIR / "git_push.lock").unlink(missing_ok=True)
|
|
else:
|
|
err = r.stderr.split("\n")[0] if r.stderr else "unbekannt"
|
|
log(f"Push FEHLER: {err}")
|
|
tg_alert(cfg, "git_push",
|
|
f"*Homelab Git-Sync fehlgeschlagen*%0A%0AFehler: {err}%0AZeit: {DATE}")
|
|
|
|
|
|
# ── Main ──────────────────────────────────────────────
|
|
|
|
def main():
|
|
global CHANGED
|
|
log("Sync startet...")
|
|
|
|
cfg = config.parse_config(REPO / "homelab.conf")
|
|
forgejo_url = git_sync(cfg)
|
|
|
|
cfg = config.parse_config(REPO / "homelab.conf")
|
|
|
|
# Watchdog
|
|
log("Watchdog läuft...")
|
|
check_service(cfg, 109, "rss-manager", "RSS Manager")
|
|
check_docker(cfg, 101, "wordpress-app", "WordPress Docker")
|
|
|
|
# STATE.md Dateien generieren
|
|
states = {
|
|
"arakava-news/STATE.md": generate_arakava_state(cfg),
|
|
"infrastructure/STATE.md": generate_infra_state(cfg),
|
|
"smart-home/STATE.md": generate_smarthome_state(cfg),
|
|
}
|
|
|
|
for path, content in states.items():
|
|
full_path = REPO / path
|
|
full_path.parent.mkdir(parents=True, exist_ok=True)
|
|
full_path.write_text(content)
|
|
CHANGED = True
|
|
log(f"{path} aktualisiert")
|
|
|
|
if CHANGED:
|
|
git_commit_and_push(cfg, forgejo_url)
|
|
else:
|
|
log("Keine Änderungen")
|
|
|
|
log("Sync abgeschlossen")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|