monitor: HTTP-Checks mit Retries/Timeout; Flugscanner pp-1 toleranter (instabile Verbindung)
This commit is contained in:
parent
b920f9fd28
commit
96b6476b1f
1 changed files with 22 additions and 7 deletions
|
|
@ -5,6 +5,7 @@ import os
|
|||
import json
|
||||
import hashlib
|
||||
import requests
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
sys.path.insert(0, os.path.dirname(__file__))
|
||||
|
|
@ -45,7 +46,8 @@ HTTP_HEALTH_CHECKS = [
|
|||
{"name": "WordPress (CT 101)", "url": "http://10.10.10.101/robots.txt"},
|
||||
{"name": "Matomo (CT 113)", "url": "http://10.10.10.113"},
|
||||
{"name": "Grafana (CT 110)", "url": "http://10.10.10.110:3000"},
|
||||
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status"},
|
||||
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status",
|
||||
"retries": 4, "timeout": 25, "retry_delay": 5},
|
||||
]
|
||||
|
||||
EXPECTED_STOPPED = {
|
||||
|
|
@ -138,12 +140,25 @@ def check_all() -> list[str]:
|
|||
|
||||
_headers = {"User-Agent": "Mozilla/5.0 (Hausmeister-Bot/1.0 health-check)"}
|
||||
for check in HTTP_HEALTH_CHECKS:
|
||||
try:
|
||||
r = requests.head(check["url"], timeout=15, allow_redirects=True, headers=_headers)
|
||||
if r.status_code >= 400:
|
||||
alerts.append(f"🔴 {check['name']} antwortet mit HTTP {r.status_code}")
|
||||
except requests.RequestException as e:
|
||||
alerts.append(f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}")
|
||||
timeout = check.get("timeout", 15)
|
||||
retries = check.get("retries", 1)
|
||||
retry_delay = check.get("retry_delay", 3)
|
||||
msg = None
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
r = requests.head(
|
||||
check["url"], timeout=timeout, allow_redirects=True, headers=_headers
|
||||
)
|
||||
if r.status_code < 400:
|
||||
msg = None
|
||||
break
|
||||
msg = f"🔴 {check['name']} antwortet mit HTTP {r.status_code}"
|
||||
except requests.RequestException as e:
|
||||
msg = f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}"
|
||||
if attempt < retries - 1:
|
||||
time.sleep(retry_delay)
|
||||
if msg:
|
||||
alerts.append(msg)
|
||||
|
||||
restarts = loki_client.check_service_restarts(minutes=35)
|
||||
for r in restarts:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue