fix: sync-state.sh → thin Python wrapper mit Retry-Logik
This commit is contained in:
parent
c63f0c7a04
commit
c1140eb6bc
1 changed files with 7 additions and 334 deletions
|
|
@ -1,344 +1,17 @@
|
|||
#!/bin/bash
|
||||
# ============================================================
|
||||
# homelab-brain Auto-Sync Script
|
||||
# homelab-brain Auto-Sync — Shell-Wrapper
|
||||
# Läuft alle 15 Min auf pve-hetzner via Cron
|
||||
# Aktualisiert STATE.md Dateien, pushed nach Forgejo
|
||||
# Telegram-Alerts bei Service-Ausfällen und Push-Fehlern
|
||||
# Stand: 08.03.2026
|
||||
# ============================================================
|
||||
# WICHTIG: Alle variablen Daten kommen aus homelab.conf.
|
||||
# Niemals hier IPs, URLs oder Credentials hardcoden!
|
||||
# Ruft sync_state.py auf (nutzt Core-Module + Retry-Logik)
|
||||
# ============================================================
|
||||
|
||||
export PATH="/usr/sbin:/usr/local/sbin:/usr/local/bin:/usr/bin:/sbin:/bin"
|
||||
set -euo pipefail
|
||||
REPO="/opt/homelab-brain"
|
||||
|
||||
# --- Quelle der Wahrheit laden ---
|
||||
source "$REPO/homelab.conf"
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
VENV="/root/homelab-mcp/.venv"
|
||||
|
||||
TG_TOKEN="$TG_MUTTER_TOKEN"
|
||||
TG_CHAT="$TG_CHAT_ID"
|
||||
FORGEJO_TOKEN="$FORGEJO_SYNC_TOKEN"
|
||||
DEBOUNCE_DIR="/tmp/homelab_watchdog"
|
||||
DATE=$(date '+%Y-%m-%d %H:%M')
|
||||
CHANGED=0
|
||||
|
||||
mkdir -p "$DEBOUNCE_DIR"
|
||||
log() { echo "[$(date '+%H:%M:%S')] $1"; }
|
||||
|
||||
# Telegram Alert mit Debounce (10 Min pro Alert-Typ)
|
||||
tg_alert() {
|
||||
local key="$1" msg="$2"
|
||||
local lockfile="$DEBOUNCE_DIR/${key}.lock"
|
||||
local now=$(date +%s)
|
||||
if [ -f "$lockfile" ]; then
|
||||
local last=$(cat "$lockfile")
|
||||
if [ $((now - last)) -lt 600 ]; then
|
||||
return # Debounce aktiv
|
||||
fi
|
||||
fi
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${TG_CHAT}" \
|
||||
-d "text=Homelab Watchdog%0A%0A${msg}" \
|
||||
-d "parse_mode=Markdown" > /dev/null 2>&1
|
||||
echo "$now" > "$lockfile"
|
||||
log "Alert gesendet: $key"
|
||||
}
|
||||
|
||||
# Service-Watchdog: prüft ob ein CT-Service läuft (mit Retry)
|
||||
check_service() {
|
||||
local ct="$1" service="$2" name="$3"
|
||||
local status attempt
|
||||
for attempt in 1 2 3; do
|
||||
status=$(pct exec "$ct" -- systemctl is-active "$service" 2>/dev/null || echo "unknown")
|
||||
if [ "$status" = "active" ]; then
|
||||
break
|
||||
fi
|
||||
[ "$attempt" -lt 3 ] && sleep 2
|
||||
done
|
||||
if [ "$status" != "active" ]; then
|
||||
tg_alert "service_${service}" "*${name}* ist DOWN%0AService: ${service}%0ACT: ${ct}%0AStatus: ${status}" >&2
|
||||
echo "DOWN"
|
||||
else
|
||||
if [ -f "$DEBOUNCE_DIR/service_${service}.lock" ]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${TG_CHAT}" \
|
||||
-d "text=*${name}* wieder online" \
|
||||
-d "parse_mode=Markdown" > /dev/null 2>&1
|
||||
rm -f "$DEBOUNCE_DIR/service_${service}.lock"
|
||||
log "Recovery: $name wieder online" >&2
|
||||
fi
|
||||
echo "active"
|
||||
fi
|
||||
}
|
||||
|
||||
cd "$REPO"
|
||||
git pull --quiet 2>/dev/null || true
|
||||
|
||||
# ─────────────────────────────────────────────────────
|
||||
# 0. SERVICE WATCHDOG
|
||||
# ─────────────────────────────────────────────────────
|
||||
log "Watchdog läuft..."
|
||||
RSS_LIVE=$(check_service 109 rss-manager "RSS Manager")
|
||||
|
||||
WP_LIVE="unknown"
|
||||
for _wp_attempt in 1 2 3; do
|
||||
WP_LIVE=$(pct exec 101 -- docker inspect --format='{{.State.Status}}' wordpress-app 2>/dev/null || echo "unknown")
|
||||
[ "$WP_LIVE" = "running" ] && break
|
||||
[ "$_wp_attempt" -lt 3 ] && sleep 2
|
||||
done
|
||||
if [ "$WP_LIVE" != "running" ]; then
|
||||
tg_alert "wordpress" "*WordPress Docker* ist DOWN%0AStatus: ${WP_LIVE}%0ACT: 101"
|
||||
if [ -f "$VENV/bin/python" ]; then
|
||||
exec "$VENV/bin/python" "$SCRIPT_DIR/sync_state.py" 2>&1
|
||||
else
|
||||
if [ -f "$DEBOUNCE_DIR/wordpress.lock" ]; then
|
||||
curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \
|
||||
-d "chat_id=${TG_CHAT}" \
|
||||
-d "text=*WordPress Docker* wieder online" \
|
||||
-d "parse_mode=Markdown" > /dev/null 2>&1
|
||||
rm -f "$DEBOUNCE_DIR/wordpress.lock"
|
||||
log "Recovery: WordPress wieder online"
|
||||
fi
|
||||
exec python3 "$SCRIPT_DIR/sync_state.py" 2>&1
|
||||
fi
|
||||
|
||||
# ─────────────────────────────────────────────────────
|
||||
# 1. ARAKAVA NEWS STATE
|
||||
# ─────────────────────────────────────────────────────
|
||||
log "Sammle Arakava News Status..."
|
||||
|
||||
RSS_STATUS=$(pct exec 109 -- systemctl is-active rss-manager 2>/dev/null || echo "unknown")
|
||||
WP_STATUS=$(pct exec 101 -- docker inspect --format='{{.State.Status}}' wordpress-app 2>/dev/null || echo "unknown")
|
||||
|
||||
# Letzte Feed-Aktivität aus SQLite
|
||||
FEED_ACTIVITY=$(pct exec 109 -- python3 -c "
|
||||
import sqlite3
|
||||
db = sqlite3.connect('/opt/rss-manager/rss_manager.db')
|
||||
rows = db.execute(\"SELECT name, last_run FROM feeds WHERE enabled=1 ORDER BY last_run DESC LIMIT 5\").fetchall()
|
||||
for r in rows: print(f' {r[0]}: {r[1] or \"nie\"}')
|
||||
" 2>/dev/null || echo " (nicht abrufbar)")
|
||||
|
||||
# Fehler letzte 24h
|
||||
ERRORS=$(pct exec 109 -- bash -c "grep -c 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null || echo 0" 2>/dev/null || echo "0")
|
||||
LAST_ERROR=$(pct exec 109 -- bash -c "grep 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null | tail -1 || echo 'keine'" 2>/dev/null || echo "keine")
|
||||
|
||||
# OpenRouter Balance
|
||||
OR_BALANCE=$(pct exec 109 -- python3 -c "
|
||||
import requests
|
||||
try:
|
||||
r = requests.get('https://openrouter.ai/api/v1/auth/key',
|
||||
headers={'Authorization': 'Bearer $OPENROUTER_KEY'},
|
||||
timeout=5)
|
||||
d = r.json().get('data', {})
|
||||
remaining = float(d.get('limit', 20)) - float(d.get('usage', 0))
|
||||
print(f'\${remaining:.2f} verbleibend')
|
||||
except Exception as e:
|
||||
print(f'(nicht abrufbar: {e})')
|
||||
" 2>/dev/null || echo "(nicht abrufbar)")
|
||||
|
||||
cat > "$REPO/arakava-news/STATE.md" << EOF
|
||||
# Arakava News — Live State
|
||||
> Auto-generiert: $DATE
|
||||
|
||||
## Service Status
|
||||
| Service | CT | Status |
|
||||
|---|---|---|
|
||||
| rss-manager | 109 | $RSS_STATUS |
|
||||
| WordPress Docker | 101 | $WP_STATUS |
|
||||
|
||||
## Letzte Feed-Aktivität (Top 5)
|
||||
$FEED_ACTIVITY
|
||||
|
||||
## Fehler (letzte 24h)
|
||||
- Fehler gesamt: $ERRORS
|
||||
- Letzter Fehler: $LAST_ERROR
|
||||
|
||||
## OpenRouter Guthaben
|
||||
$OR_BALANCE
|
||||
|
||||
## URLs
|
||||
- Blog: https://$DOMAIN_PRIMARY
|
||||
- Admin: https://$DOMAIN_PRIMARY/wp-admin (admin / $PW_WP_ADMIN)
|
||||
- RSS Manager: http://$(echo $CT_109 | cut -d'|' -f2):8080 (admin / $PW_DEFAULT)
|
||||
- Matomo: https://$DOMAIN_MATOMO (admin / $PW_DEFAULT)
|
||||
|
||||
## Container (Primary — pve-hetzner)
|
||||
| CT | Dienst | Tailscale |
|
||||
|---|---|---|
|
||||
| 101 | $(echo $CT_101 | cut -d'|' -f3) | $(echo $CT_101 | cut -d'|' -f2) |
|
||||
| 109 | $(echo $CT_109 | cut -d'|' -f3) | $(echo $CT_109 | cut -d'|' -f2) |
|
||||
|
||||
## Container (Mirror — pve3 Muldenstein)
|
||||
| CT | Dienst | Tailscale |
|
||||
|---|---|---|
|
||||
| 600 | $(echo $CT_600 | cut -d'|' -f3) | $(echo $CT_600 | cut -d'|' -f2) |
|
||||
| 601 | $(echo $CT_601 | cut -d'|' -f3) | $(echo $CT_601 | cut -d'|' -f2) |
|
||||
|
||||
## Aktive Feeds (17)
|
||||
| ID | Name | Schedule |
|
||||
|---|---|---|
|
||||
| 1 | Dr. Bines Substack | 08/14/20 Uhr |
|
||||
| 3 | NachDenkSeiten | 07/13/19 Uhr |
|
||||
| 4 | Tichys Einblick | 07:30/13:30/19:30 |
|
||||
| 5 | Junge Freiheit | 08/14/20 Uhr |
|
||||
| 6 | PAZ | 08:30/14:30/20:30 |
|
||||
| 7 | Apollo News | 09/15/21 Uhr |
|
||||
| 8 | Apolut | 09:30/15:30/21:30 |
|
||||
| 9 | Achgut.com | 10/16/22 Uhr |
|
||||
| 10 | Heise Security | alle 4h |
|
||||
| 11 | Golem.de | alle 2h |
|
||||
| 12 | Heise Online | alle 3h |
|
||||
| 13 | Rubikon.news | alle 3h |
|
||||
| 14 | Corona-Transition | alle 4h |
|
||||
| 15 | Photon.info (KI-Analyse) | alle 6h |
|
||||
| 16 | Antispiegel | 08:30/14:30/20:30 |
|
||||
| 17 | Riehle News | 09:00 Uhr |
|
||||
|
||||
## Code (CT 109: /opt/rss-manager/)
|
||||
poster.py, scheduler.py, app.py, db.py
|
||||
|
||||
## Änderungshistorie
|
||||
- 08.03.2026: Domain arakavanews.com live, Mirror CT 600/601 auf pve3
|
||||
- 08.03.2026: homelab.conf als zentrale Quelle der Wahrheit
|
||||
- 24.02.2026: Scheduler Lock gegen Doppelstarts
|
||||
- 24.02.2026: Telegram auf HTML-Modus (Sonderzeichen-Fix)
|
||||
- 24.02.2026: Werbeartikel-Blacklist (Anzeige:, Sponsored, etc.)
|
||||
- 23.02.2026: Matomo von CT 113 → CT 109 migriert
|
||||
- 23.02.2026: CT 100/102/104/105/106/113 gelöscht
|
||||
EOF
|
||||
CHANGED=1
|
||||
log "Arakava News STATE.md aktualisiert"
|
||||
|
||||
# ─────────────────────────────────────────────────────
|
||||
# 2. INFRASTRUKTUR STATE
|
||||
# ─────────────────────────────────────────────────────
|
||||
log "Sammle Infrastruktur Status..."
|
||||
|
||||
DISK_ROOT=$(df -h / | awk 'NR==2{print $5 " von " $2}')
|
||||
DISK_DATA=$(df -h /var/lib/vz | awk 'NR==2{print $5 " von " $2}' 2>/dev/null || echo "n/a")
|
||||
|
||||
cat > "$REPO/infrastructure/STATE.md" << EOF
|
||||
# Infrastruktur — Live State
|
||||
> Auto-generiert: $DATE
|
||||
|
||||
## pve-hetzner Disk
|
||||
| Mount | Belegt |
|
||||
|---|---|
|
||||
| / (root) | $DISK_ROOT |
|
||||
| /var/lib/vz (VMs/CTs) | $DISK_DATA |
|
||||
|
||||
## Aktive Container auf pve-hetzner
|
||||
| CT | Name | Tailscale IP | Dienste |
|
||||
|---|---|---|---|
|
||||
| 101 | $(echo $CT_101 | cut -d'|' -f1) | $(echo $CT_101 | cut -d'|' -f2) | $(echo $CT_101 | cut -d'|' -f3) |
|
||||
| 103 | $(echo $CT_103 | cut -d'|' -f1) | $(echo $CT_103 | cut -d'|' -f2) | $(echo $CT_103 | cut -d'|' -f3) ($DOMAIN_SEAFILE) |
|
||||
| 109 | $(echo $CT_109 | cut -d'|' -f1) | $(echo $CT_109 | cut -d'|' -f2) | $(echo $CT_109 | cut -d'|' -f3) |
|
||||
| 110 | $(echo $CT_110 | cut -d'|' -f1) | $(echo $CT_110 | cut -d'|' -f2) | $(echo $CT_110 | cut -d'|' -f3) |
|
||||
| 111 | $(echo $CT_111 | cut -d'|' -f1) | $(echo $CT_111 | cut -d'|' -f2) | $(echo $CT_111 | cut -d'|' -f3) (http://$(echo $CT_111 | cut -d'|' -f2):3000) |
|
||||
| 144 | $(echo $CT_144 | cut -d'|' -f1) | $(echo $CT_144 | cut -d'|' -f2) | $(echo $CT_144 | cut -d'|' -f3) |
|
||||
| 999 | $(echo $CT_999 | cut -d'|' -f1) | $(echo $CT_999 | cut -d'|' -f2) | $(echo $CT_999 | cut -d'|' -f3) (http://$(echo $CT_999 | cut -d'|' -f2):8080) |
|
||||
|
||||
## Gelöschte Container (24.02.2026)
|
||||
| CT | Name | Grund |
|
||||
|---|---|---|
|
||||
| 100 | traefik | Abgelöst durch Cloudflare Tunnel |
|
||||
| 102 | dify | Experiment fehlgeschlagen |
|
||||
| 104 | n8n | Nicht aktiv genutzt |
|
||||
| 105 | debian-12 | Nicht genutzt |
|
||||
| 106 | wordpress-news | Abgelöst durch CT 101 |
|
||||
| 113 | matomo | Integriert in CT 109 |
|
||||
|
||||
## Container auf pve1 (Kambodscha)
|
||||
| CT | Name | Dienste |
|
||||
|---|---|---|
|
||||
| 136 | $(echo $CT_136 | cut -d'|' -f1) | $(echo $CT_136 | cut -d'|' -f3) (Tailscale: $(echo $CT_136 | cut -d'|' -f2)) |
|
||||
| 143 | $(echo $CT_143_PVE1 | cut -d'|' -f1) | $(echo $CT_143_PVE1 | cut -d'|' -f3) |
|
||||
|
||||
## Container auf pve3 (Muldenstein)
|
||||
| CT | Name | Tailscale IP | Dienste |
|
||||
|---|---|---|---|
|
||||
| 600 | $(echo $CT_600 | cut -d'|' -f1) | $(echo $CT_600 | cut -d'|' -f2) | $(echo $CT_600 | cut -d'|' -f3) |
|
||||
| 601 | $(echo $CT_601 | cut -d'|' -f1) | $(echo $CT_601 | cut -d'|' -f2) | $(echo $CT_601 | cut -d'|' -f3) |
|
||||
| 145 | $(echo $CT_145 | cut -d'|' -f1) | $(echo $CT_145 | cut -d'|' -f2) | $(echo $CT_145 | cut -d'|' -f3) |
|
||||
|
||||
## Routing
|
||||
- Cloudflare Tunnel CT 101: $DOMAIN_PRIMARY → :80
|
||||
- Cloudflare Tunnel CT 101: $DOMAIN_OLD → 301 → $DOMAIN_PRIMARY
|
||||
- Cloudflare Tunnel CT 109: $DOMAIN_MATOMO → :80
|
||||
- Cloudflare Tunnel CT 600: Standby (WordPress Mirror)
|
||||
- Cloudflare Tunnel CT 601: Standby (RSS Manager Mirror)
|
||||
- Kein Traefik, kein PBS-Gateway mehr
|
||||
|
||||
## Zugangsdaten
|
||||
- pve-hetzner: root / $PW_HETZNER
|
||||
- pve1: root / $PW_DEFAULT
|
||||
- Alle CTs: root / $PW_DEFAULT
|
||||
- Seafile: admin@orbitalo.net / $PW_DEFAULT
|
||||
- Forgejo: orbitalo / $PW_DEFAULT
|
||||
|
||||
## Telegram Bots
|
||||
| Bot | Token (Auszug) | Chat-ID |
|
||||
|---|---|---|
|
||||
| Mutter (@MutterbotAI_bot) | 8551565940:... | 674951792 |
|
||||
EOF
|
||||
CHANGED=1
|
||||
log "Infrastruktur STATE.md aktualisiert"
|
||||
|
||||
# ─────────────────────────────────────────────────────
|
||||
# 3. SMART HOME STATE
|
||||
# ─────────────────────────────────────────────────────
|
||||
log "Sammle Smart Home Status..."
|
||||
|
||||
LAST_BACKUP=$(ls -t /home/backup-muldenstein/backups/*.tar.gz 2>/dev/null | head -1 | xargs ls -lh 2>/dev/null | awk '{print $5, $6, $7, $8}' || echo "nicht abrufbar")
|
||||
BACKUP_COUNT=$(ls /home/backup-muldenstein/backups/*.tar.gz 2>/dev/null | wc -l || echo "0")
|
||||
|
||||
cat > "$REPO/smart-home/STATE.md" << EOF
|
||||
# Smart Home Muldenstein — Live State
|
||||
> Auto-generiert: $DATE
|
||||
|
||||
## Backup-Status
|
||||
- Letztes Backup: $LAST_BACKUP
|
||||
- Backups gesamt: $BACKUP_COUNT
|
||||
- Ziel: /home/backup-muldenstein/backups/ (CT 144)
|
||||
|
||||
## Services (CT 143)
|
||||
| Dienst | URL |
|
||||
|---|---|
|
||||
| Grafana | https://grafana.orbitalo.net |
|
||||
| ioBroker | http://192.168.178.36:8081 |
|
||||
| InfluxDB | http://192.168.178.36:8086 |
|
||||
|
||||
## Grafana Alerts → Telegram 674951792
|
||||
- Promtail DOWN (> 5 Min keine Daten)
|
||||
- CPU > 70%
|
||||
- Memory > 80%
|
||||
- Disk > 90%
|
||||
|
||||
## Backup-Zeitplan
|
||||
- täglich 04:00 → /root/backup-to-hetzner.sh (auf pve3)
|
||||
- Retention: 30d tägl, 90d wöchl, unbegrenzt monatl
|
||||
EOF
|
||||
CHANGED=1
|
||||
log "Smart Home STATE.md aktualisiert"
|
||||
|
||||
# ─────────────────────────────────────────────────────
|
||||
# 4. GIT COMMIT & PUSH
|
||||
# ─────────────────────────────────────────────────────
|
||||
if [ "$CHANGED" -eq 1 ]; then
|
||||
log "Committe Änderungen..."
|
||||
git -C "$REPO" add -A
|
||||
git -C "$REPO" -c user.email="sync@homelab" -c user.name="Auto-Sync" \
|
||||
commit -m "Auto-Sync: $DATE" --quiet || true
|
||||
if git -C "$REPO" push \
|
||||
"http://orbitalo:${FORGEJO_TOKEN}@100.89.246.60:3000/orbitalo/homelab-brain.git" main --quiet 2>/tmp/git-push-err; then
|
||||
log "Push erfolgreich"
|
||||
rm -f "$DEBOUNCE_DIR/git_push.lock"
|
||||
else
|
||||
ERR=$(cat /tmp/git-push-err | head -1)
|
||||
log "Push FEHLER: $ERR"
|
||||
tg_alert "git_push" "*Homelab Git-Sync fehlgeschlagen*%0A%0AFehler: ${ERR}%0AZeit: ${DATE}"
|
||||
fi
|
||||
else
|
||||
log "Keine Änderungen"
|
||||
fi
|
||||
|
||||
log "Sync abgeschlossen"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue