feat: count_errors Tool, gather_errors mit Anzahl+Host, Tool-Descriptions geschärft
This commit is contained in:
parent
fc07039709
commit
8e74da7530
3 changed files with 78 additions and 4 deletions
|
|
@ -43,9 +43,41 @@ def gather_status() -> str:
|
|||
|
||||
|
||||
def gather_errors(hours: float = 2) -> str:
|
||||
"""Aktuelle Fehler aus Loki für /errors."""
|
||||
entries = loki_client.get_errors(hours=hours, limit=30)
|
||||
return loki_client.format_logs(entries)
|
||||
"""Aktuelle Fehler aus Loki — mit Anzahl + Beispiele."""
|
||||
result = loki_client.count_errors(hours=hours)
|
||||
if "error" in result:
|
||||
return f"Loki-Fehler: {result['error']}"
|
||||
count = result["count"]
|
||||
per_host = result.get("per_host", {})
|
||||
lines = [f"Fehler ({hours:.0f}h): {count} Einträge"]
|
||||
if per_host:
|
||||
top = sorted(per_host.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||
lines.append("Top-Hosts:")
|
||||
for host, n in top:
|
||||
lines.append(f" {host}: {n}x")
|
||||
if count > 0:
|
||||
examples = loki_client.get_errors(hours=hours, limit=5)
|
||||
real = [e for e in examples if "error" not in e]
|
||||
if real:
|
||||
lines.append("Letzte Beispiele:")
|
||||
for e in real[:3]:
|
||||
lines.append(f" [{e.get('host','?')}] {e.get('line','')[:120]}")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def gather_error_count(hours: float = 24) -> str:
|
||||
"""Nur die Fehleranzahl aus Loki — für Zähl-Fragen."""
|
||||
result = loki_client.count_errors(hours=hours)
|
||||
if "error" in result:
|
||||
return f"Loki-Fehler: {result['error']}"
|
||||
count = result["count"]
|
||||
per_host = result.get("per_host", {})
|
||||
lines = [f"{count} Fehler-Einträge in den letzten {hours:.0f} Stunden"]
|
||||
if per_host:
|
||||
top = sorted(per_host.items(), key=lambda x: x[1], reverse=True)[:8]
|
||||
for host, n in top:
|
||||
lines.append(f" {host}: {n}x")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def gather_container_status(query: str) -> str:
|
||||
|
|
@ -255,6 +287,7 @@ def get_tool_handlers() -> dict:
|
|||
"get_all_containers": lambda: gather_status(),
|
||||
"get_container_detail": lambda query: gather_container_status(query),
|
||||
"get_errors": lambda hours=2: gather_errors(hours=hours),
|
||||
"count_errors": lambda hours=24: gather_error_count(hours=hours),
|
||||
"get_container_logs": lambda container, hours=1: gather_logs(container, hours=hours),
|
||||
"get_silent_hosts": lambda: gather_silence(),
|
||||
"get_server_metrics": lambda host=None: _tool_get_server_metrics(host),
|
||||
|
|
|
|||
|
|
@ -117,6 +117,33 @@ WATCHED_SERVICES = [
|
|||
]
|
||||
|
||||
|
||||
def count_errors(hours: float = 24) -> dict:
|
||||
"""Zählt Fehler-Log-Einträge über einen Zeitraum via Loki metric query."""
|
||||
now = datetime.now(timezone.utc)
|
||||
start = now - timedelta(hours=hours)
|
||||
q = '{job=~".+"} |~ "(?i)(error|fatal|panic|traceback|exception)" !~ "caller=metrics|query_hash=|executing query|scheduler_processor|Aborted connection|systemd-networkd-wait-online|context canceled|AH01630: client denied"'
|
||||
# Loki instant metric query für Gesamtanzahl
|
||||
data = _query("/loki/api/v1/query_range", {
|
||||
"query": q,
|
||||
"start": _ns(start),
|
||||
"end": _ns(now),
|
||||
"limit": 5000,
|
||||
"direction": "backward",
|
||||
})
|
||||
if "error" in data:
|
||||
return {"error": data["error"], "count": 0}
|
||||
total = sum(
|
||||
len(stream.get("values", []))
|
||||
for stream in data.get("data", {}).get("result", [])
|
||||
)
|
||||
# Per-Host aufschlüsseln
|
||||
per_host: dict[str, int] = {}
|
||||
for stream in data.get("data", {}).get("result", []):
|
||||
host = stream.get("stream", {}).get("host", "unknown")
|
||||
per_host[host] = per_host.get(host, 0) + len(stream.get("values", []))
|
||||
return {"count": total, "hours": hours, "per_host": per_host}
|
||||
|
||||
|
||||
def check_service_restarts(minutes: int = 35) -> list[dict]:
|
||||
"""Findet Services die innerhalb des Zeitfensters neu gestartet haben (systemd journal via Loki)."""
|
||||
restarts = []
|
||||
|
|
|
|||
|
|
@ -44,11 +44,25 @@ TOOLS = [
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "count_errors",
|
||||
"description": "Zählt Fehler-Logs aus Loki und gibt ANZAHL pro Host zurück. Nutze dieses Tool wenn nach der ANZAHL von Fehlern gefragt wird (z.B. 'wieviele Fehler', 'wie oft', 'Fehleranzahl').",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"hours": {"type": "number", "description": "Zeitraum in Stunden (z.B. 24 = heute, 72 = 3 Tage, 168 = 1 Woche)", "default": 24}
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_errors",
|
||||
"description": "Aktuelle Fehler-Logs aus Loki (alle Container)",
|
||||
"description": "Zeigt Fehler-Logs aus Loki mit Beispielen (Inhalt der Fehlermeldungen). Nutze dieses Tool wenn nach dem INHALT oder DETAILS von Fehlern gefragt wird.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue