monitor: HTTP-Checks mit Retries/Timeout; Flugscanner pp-1 toleranter (instabile Verbindung)
This commit is contained in:
parent
b920f9fd28
commit
96b6476b1f
1 changed files with 22 additions and 7 deletions
|
|
@ -5,6 +5,7 @@ import os
|
||||||
import json
|
import json
|
||||||
import hashlib
|
import hashlib
|
||||||
import requests
|
import requests
|
||||||
|
import time
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(__file__))
|
sys.path.insert(0, os.path.dirname(__file__))
|
||||||
|
|
@ -45,7 +46,8 @@ HTTP_HEALTH_CHECKS = [
|
||||||
{"name": "WordPress (CT 101)", "url": "http://10.10.10.101/robots.txt"},
|
{"name": "WordPress (CT 101)", "url": "http://10.10.10.101/robots.txt"},
|
||||||
{"name": "Matomo (CT 113)", "url": "http://10.10.10.113"},
|
{"name": "Matomo (CT 113)", "url": "http://10.10.10.113"},
|
||||||
{"name": "Grafana (CT 110)", "url": "http://10.10.10.110:3000"},
|
{"name": "Grafana (CT 110)", "url": "http://10.10.10.110:3000"},
|
||||||
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status"},
|
{"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status",
|
||||||
|
"retries": 4, "timeout": 25, "retry_delay": 5},
|
||||||
]
|
]
|
||||||
|
|
||||||
EXPECTED_STOPPED = {
|
EXPECTED_STOPPED = {
|
||||||
|
|
@ -138,12 +140,25 @@ def check_all() -> list[str]:
|
||||||
|
|
||||||
_headers = {"User-Agent": "Mozilla/5.0 (Hausmeister-Bot/1.0 health-check)"}
|
_headers = {"User-Agent": "Mozilla/5.0 (Hausmeister-Bot/1.0 health-check)"}
|
||||||
for check in HTTP_HEALTH_CHECKS:
|
for check in HTTP_HEALTH_CHECKS:
|
||||||
|
timeout = check.get("timeout", 15)
|
||||||
|
retries = check.get("retries", 1)
|
||||||
|
retry_delay = check.get("retry_delay", 3)
|
||||||
|
msg = None
|
||||||
|
for attempt in range(retries):
|
||||||
try:
|
try:
|
||||||
r = requests.head(check["url"], timeout=15, allow_redirects=True, headers=_headers)
|
r = requests.head(
|
||||||
if r.status_code >= 400:
|
check["url"], timeout=timeout, allow_redirects=True, headers=_headers
|
||||||
alerts.append(f"🔴 {check['name']} antwortet mit HTTP {r.status_code}")
|
)
|
||||||
|
if r.status_code < 400:
|
||||||
|
msg = None
|
||||||
|
break
|
||||||
|
msg = f"🔴 {check['name']} antwortet mit HTTP {r.status_code}"
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
alerts.append(f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}")
|
msg = f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}"
|
||||||
|
if attempt < retries - 1:
|
||||||
|
time.sleep(retry_delay)
|
||||||
|
if msg:
|
||||||
|
alerts.append(msg)
|
||||||
|
|
||||||
restarts = loki_client.check_service_restarts(minutes=35)
|
restarts = loki_client.check_service_restarts(minutes=35)
|
||||||
for r in restarts:
|
for r in restarts:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue