monitor: HTTP-Checks mit Retries/Timeout; Flugscanner pp-1 toleranter (instabile Verbindung)

This commit is contained in:
Homelab Cursor 2026-03-25 13:09:35 +01:00
parent b920f9fd28
commit 96b6476b1f

View file

@ -5,6 +5,7 @@ import os
import json
import hashlib
import requests
import time
from datetime import datetime, timezone
sys.path.insert(0, os.path.dirname(__file__))
@ -45,7 +46,8 @@ HTTP_HEALTH_CHECKS = [
{"name": "WordPress (CT 101)", "url": "http://10.10.10.101/robots.txt"},
{"name": "Matomo (CT 113)", "url": "http://10.10.10.113"},
{"name": "Grafana (CT 110)", "url": "http://10.10.10.110:3000"},
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status"},
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status",
"retries": 4, "timeout": 25, "retry_delay": 5},
]
EXPECTED_STOPPED = {
@ -138,12 +140,25 @@ def check_all() -> list[str]:
_headers = {"User-Agent": "Mozilla/5.0 (Hausmeister-Bot/1.0 health-check)"}
for check in HTTP_HEALTH_CHECKS:
timeout = check.get("timeout", 15)
retries = check.get("retries", 1)
retry_delay = check.get("retry_delay", 3)
msg = None
for attempt in range(retries):
try:
r = requests.head(check["url"], timeout=15, allow_redirects=True, headers=_headers)
if r.status_code >= 400:
alerts.append(f"🔴 {check['name']} antwortet mit HTTP {r.status_code}")
r = requests.head(
check["url"], timeout=timeout, allow_redirects=True, headers=_headers
)
if r.status_code < 400:
msg = None
break
msg = f"🔴 {check['name']} antwortet mit HTTP {r.status_code}"
except requests.RequestException as e:
alerts.append(f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}")
msg = f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}"
if attempt < retries - 1:
time.sleep(retry_delay)
if msg:
alerts.append(msg)
restarts = loki_client.check_service_restarts(minutes=35)
for r in restarts: