fix: terror-URLs nicht als Fehler zaehlen in Loki-Query

This commit is contained in:
Homelab Cursor 2026-04-15 20:46:32 +02:00
parent d8a8f9d9b0
commit 33a5895225

View file

@ -49,9 +49,9 @@ def query_logs(query: str, hours: float = 1, limit: int = 100) -> list[dict]:
def get_errors(container: str = None, hours: float = 1, limit: int = 200) -> list[dict]:
"""Get error-level logs, optionally filtered by container hostname."""
if container:
q = f'{{host="{container}"}} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate"'
q = f'{{host="{container}"}} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate|terror"'
else:
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate"'
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate|terror"'
return query_logs(q, hours=hours, limit=limit)
@ -121,7 +121,7 @@ def count_errors(hours: float = 24) -> dict:
"""Zählt Fehler-Log-Einträge über einen Zeitraum via Loki metric query."""
now = datetime.now(timezone.utc)
start = now - timedelta(hours=hours)
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate"'
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate|terror"'
# Loki instant metric query für Gesamtanzahl
data = _query("/loki/api/v1/query_range", {
"query": q,
@ -171,7 +171,7 @@ def check_error_rate(minutes: int = 30) -> list[dict]:
alerts = []
now = datetime.now(timezone.utc)
for host in all_hosts:
q = f'count_over_time({{host="{host}"}} |~ "(?i)error" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate" [{minutes}m])'
q = f'count_over_time({{host="{host}"}} |~ "(?i)error" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied|flag evaluation succeeded|pluginsAutoUpdate|terror" [{minutes}m])'
data = _query("/loki/api/v1/query", {"query": q, "time": _ns(now)})
count = sum(
int(float(r.get("value", [None, "0"])[1]))