diff --git a/arakava-news/STATE.md b/arakava-news/STATE.md index 37f8109a..3be378ce 100644 --- a/arakava-news/STATE.md +++ b/arakava-news/STATE.md @@ -1,5 +1,5 @@ # Arakava News — Live State -> Auto-generiert: 2026-03-08 06:45 +> Auto-generiert: 2026-03-08 06:46 ## Service Status | Service | CT | Status | diff --git a/infrastructure/STATE.md b/infrastructure/STATE.md index 5f1cb0ef..2e9e95e1 100644 --- a/infrastructure/STATE.md +++ b/infrastructure/STATE.md @@ -1,5 +1,5 @@ # Infrastruktur — Live State -> Auto-generiert: 2026-03-08 06:45 +> Auto-generiert: 2026-03-08 06:46 ## pve-hetzner Disk | Mount | Belegt | diff --git a/scripts/sync-state.sh b/scripts/sync-state.sh index a280a148..88b93afd 100755 --- a/scripts/sync-state.sh +++ b/scripts/sync-state.sh @@ -10,6 +10,7 @@ # Niemals hier IPs, URLs oder Credentials hardcoden! # ============================================================ +export PATH="/usr/sbin:/usr/local/sbin:/usr/local/bin:/usr/bin:/sbin:/bin" set -euo pipefail REPO="/opt/homelab-brain" @@ -45,13 +46,19 @@ tg_alert() { log "Alert gesendet: $key" } -# Service-Watchdog: prüft ob ein CT-Service läuft +# Service-Watchdog: prüft ob ein CT-Service läuft (mit Retry) check_service() { local ct="$1" service="$2" name="$3" - local status - status=$(pct exec "$ct" -- systemctl is-active "$service" 2>/dev/null || echo "unknown") + local status attempt + for attempt in 1 2 3; do + status=$(pct exec "$ct" -- systemctl is-active "$service" 2>/dev/null || echo "unknown") + if [ "$status" = "active" ]; then + break + fi + [ "$attempt" -lt 3 ] && sleep 2 + done if [ "$status" != "active" ]; then - tg_alert "service_${service}" "*${name}* ist DOWN%0AService: ${service}%0ACT: ${ct}%0AStatus: ${status}" + tg_alert "service_${service}" "*${name}* ist DOWN%0AService: ${service}%0ACT: ${ct}%0AStatus: ${status}" >&2 echo "DOWN" else if [ -f "$DEBOUNCE_DIR/service_${service}.lock" ]; then @@ -60,6 +67,7 @@ check_service() { -d "text=*${name}* wieder online" \ -d "parse_mode=Markdown" > /dev/null 2>&1 rm -f "$DEBOUNCE_DIR/service_${service}.lock" + log "Recovery: $name wieder online" >&2 fi echo "active" fi @@ -73,9 +81,24 @@ git pull --quiet 2>/dev/null || true # ───────────────────────────────────────────────────── log "Watchdog läuft..." RSS_LIVE=$(check_service 109 rss-manager "RSS Manager") -WP_LIVE=$(pct exec 101 -- docker inspect --format='{{.State.Status}}' wordpress-app 2>/dev/null || echo "unknown") + +WP_LIVE="unknown" +for _wp_attempt in 1 2 3; do + WP_LIVE=$(pct exec 101 -- docker inspect --format='{{.State.Status}}' wordpress-app 2>/dev/null || echo "unknown") + [ "$WP_LIVE" = "running" ] && break + [ "$_wp_attempt" -lt 3 ] && sleep 2 +done if [ "$WP_LIVE" != "running" ]; then tg_alert "wordpress" "*WordPress Docker* ist DOWN%0AStatus: ${WP_LIVE}%0ACT: 101" +else + if [ -f "$DEBOUNCE_DIR/wordpress.lock" ]; then + curl -s -X POST "https://api.telegram.org/bot${TG_TOKEN}/sendMessage" \ + -d "chat_id=${TG_CHAT}" \ + -d "text=*WordPress Docker* wieder online" \ + -d "parse_mode=Markdown" > /dev/null 2>&1 + rm -f "$DEBOUNCE_DIR/wordpress.lock" + log "Recovery: WordPress wieder online" + fi fi # ───────────────────────────────────────────────────── @@ -95,8 +118,8 @@ for r in rows: print(f' {r[0]}: {r[1] or \"nie\"}') " 2>/dev/null || echo " (nicht abrufbar)") # Fehler letzte 24h -ERRORS=$(pct exec 109 -- bash -c "grep -c 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null || echo 0") -LAST_ERROR=$(pct exec 109 -- bash -c "grep 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null | tail -1 || echo 'keine'") +ERRORS=$(pct exec 109 -- bash -c "grep -c 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null || echo 0" 2>/dev/null || echo "0") +LAST_ERROR=$(pct exec 109 -- bash -c "grep 'ERROR' /opt/rss-manager/logs/service.log 2>/dev/null | tail -1 || echo 'keine'" 2>/dev/null || echo "keine") # OpenRouter Balance OR_BALANCE=$(pct exec 109 -- python3 -c " diff --git a/smart-home/STATE.md b/smart-home/STATE.md index 60d64792..4a9e9fe1 100644 --- a/smart-home/STATE.md +++ b/smart-home/STATE.md @@ -1,5 +1,5 @@ # Smart Home Muldenstein — Live State -> Auto-generiert: 2026-03-08 06:45 +> Auto-generiert: 2026-03-08 06:46 ## Backup-Status - Letztes Backup: 513M Mar 8 04:01