"""Prometheus API client for querying system metrics.""" import requests from datetime import datetime, timezone, timedelta PROMETHEUS_URL = "http://100.88.230.59:9090" def _query(endpoint: str, params: dict, base_url: str = None) -> dict: url = f"{base_url or PROMETHEUS_URL}{endpoint}" try: r = requests.get(url, params=params, timeout=10) r.raise_for_status() return r.json() except requests.RequestException as e: return {"error": str(e), "status": "unavailable"} def instant_query(query: str) -> dict: """Run an instant PromQL query.""" return _query("/api/v1/query", {"query": query}) def range_query(query: str, hours: float = 1, step: str = "5m") -> dict: """Run a range PromQL query.""" now = datetime.now(timezone.utc) start = now - timedelta(hours=hours) return _query("/api/v1/query_range", { "query": query, "start": start.isoformat(), "end": now.isoformat(), "step": step, }) def get_targets() -> list[dict]: """Get all Prometheus scrape targets with their status.""" data = _query("/api/v1/targets", {}) if "error" in data: return [{"error": data["error"]}] targets = [] for t in data.get("data", {}).get("activeTargets", []): targets.append({ "job": t.get("labels", {}).get("job", "unknown"), "instance": t.get("labels", {}).get("instance", "unknown"), "health": t.get("health", "unknown"), "last_scrape": t.get("lastScrape", ""), }) return targets def is_available() -> bool: """Check if Prometheus is reachable.""" data = _query("/api/v1/query", {"query": "up"}) return "error" not in data or data.get("status") == "success" def get_cpu(instance: str = None, hours: float = 1) -> dict: """Get CPU usage. If instance given, filter to that instance.""" if instance: q = f'100 - (avg by (instance) (rate(node_cpu_seconds_total{{mode="idle", instance=~"{instance}.*"}}[5m])) * 100)' else: q = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)' data = instant_query(q) return _extract_metrics(data, "cpu_percent") def get_memory(instance: str = None) -> dict: """Get memory usage percentage.""" if instance: q = f'(1 - node_memory_MemAvailable_bytes{{instance=~"{instance}.*"}} / node_memory_MemTotal_bytes{{instance=~"{instance}.*"}}) * 100' else: q = '(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100' data = instant_query(q) return _extract_metrics(data, "memory_percent") def get_disk(instance: str = None) -> dict: """Get root filesystem usage percentage.""" if instance: q = f'(1 - node_filesystem_avail_bytes{{mountpoint="/", instance=~"{instance}.*"}} / node_filesystem_size_bytes{{mountpoint="/", instance=~"{instance}.*"}}) * 100' else: q = '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100' data = instant_query(q) return _extract_metrics(data, "disk_percent") def get_overview() -> str: """Get a formatted overview of all monitored instances.""" if not is_available(): return "Prometheus is not reachable at " + PROMETHEUS_URL targets = get_targets() if not targets: return "No Prometheus targets found." lines = ["## Prometheus Targets\n"] for t in targets: status = "UP" if t["health"] == "up" else "DOWN" lines.append(f"- [{status}] {t['job']} ({t['instance']})") cpu = get_cpu() if "results" in cpu: lines.append("\n## CPU Usage") for r in cpu["results"]: lines.append(f"- {r['instance']}: {r['value']:.1f}%") mem = get_memory() if "results" in mem: lines.append("\n## Memory Usage") for r in mem["results"]: lines.append(f"- {r['instance']}: {r['value']:.1f}%") disk = get_disk() if "results" in disk: lines.append("\n## Disk Usage (/)") for r in disk["results"]: lines.append(f"- {r['instance']}: {r['value']:.1f}%") return "\n".join(lines) def _extract_metrics(data: dict, metric_name: str) -> dict: if "error" in data and data.get("status") != "success": return {"error": data.get("error", "unknown error")} results = [] for r in data.get("data", {}).get("result", []): instance = r.get("metric", {}).get("instance", "unknown") value = float(r.get("value", [0, 0])[1]) results.append({"instance": instance, "value": value, "metric": metric_name}) return {"results": results}