From 96b6476b1fbec9feabd21e8e54eb5584d4e8da1a Mon Sep 17 00:00:00 2001 From: Homelab Cursor Date: Wed, 25 Mar 2026 13:09:35 +0100 Subject: [PATCH] monitor: HTTP-Checks mit Retries/Timeout; Flugscanner pp-1 toleranter (instabile Verbindung) --- homelab-ai-bot/monitor.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/homelab-ai-bot/monitor.py b/homelab-ai-bot/monitor.py index 3552a1ba..8d2850fe 100644 --- a/homelab-ai-bot/monitor.py +++ b/homelab-ai-bot/monitor.py @@ -5,6 +5,7 @@ import os import json import hashlib import requests +import time from datetime import datetime, timezone sys.path.insert(0, os.path.dirname(__file__)) @@ -45,7 +46,8 @@ HTTP_HEALTH_CHECKS = [ {"name": "WordPress (CT 101)", "url": "http://10.10.10.101/robots.txt"}, {"name": "Matomo (CT 113)", "url": "http://10.10.10.113"}, {"name": "Grafana (CT 110)", "url": "http://10.10.10.110:3000"}, - {"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status"}, + {"name": "Flugscanner-Agent (pve-pp-1)", "url": "http://100.126.26.46:5010/status", + "retries": 4, "timeout": 25, "retry_delay": 5}, ] EXPECTED_STOPPED = { @@ -138,12 +140,25 @@ def check_all() -> list[str]: _headers = {"User-Agent": "Mozilla/5.0 (Hausmeister-Bot/1.0 health-check)"} for check in HTTP_HEALTH_CHECKS: - try: - r = requests.head(check["url"], timeout=15, allow_redirects=True, headers=_headers) - if r.status_code >= 400: - alerts.append(f"🔴 {check['name']} antwortet mit HTTP {r.status_code}") - except requests.RequestException as e: - alerts.append(f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}") + timeout = check.get("timeout", 15) + retries = check.get("retries", 1) + retry_delay = check.get("retry_delay", 3) + msg = None + for attempt in range(retries): + try: + r = requests.head( + check["url"], timeout=timeout, allow_redirects=True, headers=_headers + ) + if r.status_code < 400: + msg = None + break + msg = f"🔴 {check['name']} antwortet mit HTTP {r.status_code}" + except requests.RequestException as e: + msg = f"🔴 {check['name']} nicht erreichbar: {str(e)[:80]}" + if attempt < retries - 1: + time.sleep(retry_delay) + if msg: + alerts.append(msg) restarts = loki_client.check_service_restarts(minutes=35) for r in restarts: