133 lines
4.6 KiB
Python
133 lines
4.6 KiB
Python
"""Prometheus API client for querying system metrics."""
|
|
|
|
import requests
|
|
from datetime import datetime, timezone, timedelta
|
|
|
|
PROMETHEUS_URL = "http://100.88.230.59:9090"
|
|
|
|
|
|
def _query(endpoint: str, params: dict, base_url: str = None) -> dict:
|
|
url = f"{base_url or PROMETHEUS_URL}{endpoint}"
|
|
try:
|
|
r = requests.get(url, params=params, timeout=10)
|
|
r.raise_for_status()
|
|
return r.json()
|
|
except requests.RequestException as e:
|
|
return {"error": str(e), "status": "unavailable"}
|
|
|
|
|
|
def instant_query(query: str) -> dict:
|
|
"""Run an instant PromQL query."""
|
|
return _query("/api/v1/query", {"query": query})
|
|
|
|
|
|
def range_query(query: str, hours: float = 1, step: str = "5m") -> dict:
|
|
"""Run a range PromQL query."""
|
|
now = datetime.now(timezone.utc)
|
|
start = now - timedelta(hours=hours)
|
|
return _query("/api/v1/query_range", {
|
|
"query": query,
|
|
"start": start.isoformat(),
|
|
"end": now.isoformat(),
|
|
"step": step,
|
|
})
|
|
|
|
|
|
def get_targets() -> list[dict]:
|
|
"""Get all Prometheus scrape targets with their status."""
|
|
data = _query("/api/v1/targets", {})
|
|
if "error" in data:
|
|
return [{"error": data["error"]}]
|
|
|
|
targets = []
|
|
for t in data.get("data", {}).get("activeTargets", []):
|
|
targets.append({
|
|
"job": t.get("labels", {}).get("job", "unknown"),
|
|
"instance": t.get("labels", {}).get("instance", "unknown"),
|
|
"health": t.get("health", "unknown"),
|
|
"last_scrape": t.get("lastScrape", ""),
|
|
})
|
|
return targets
|
|
|
|
|
|
def is_available() -> bool:
|
|
"""Check if Prometheus is reachable."""
|
|
data = _query("/api/v1/query", {"query": "up"})
|
|
return "error" not in data or data.get("status") == "success"
|
|
|
|
|
|
def get_cpu(instance: str = None, hours: float = 1) -> dict:
|
|
"""Get CPU usage. If instance given, filter to that instance."""
|
|
if instance:
|
|
q = f'100 - (avg by (instance) (rate(node_cpu_seconds_total{{mode="idle", instance=~"{instance}.*"}}[5m])) * 100)'
|
|
else:
|
|
q = '100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
|
|
data = instant_query(q)
|
|
return _extract_metrics(data, "cpu_percent")
|
|
|
|
|
|
def get_memory(instance: str = None) -> dict:
|
|
"""Get memory usage percentage."""
|
|
if instance:
|
|
q = f'(1 - node_memory_MemAvailable_bytes{{instance=~"{instance}.*"}} / node_memory_MemTotal_bytes{{instance=~"{instance}.*"}}) * 100'
|
|
else:
|
|
q = '(1 - node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100'
|
|
data = instant_query(q)
|
|
return _extract_metrics(data, "memory_percent")
|
|
|
|
|
|
def get_disk(instance: str = None) -> dict:
|
|
"""Get root filesystem usage percentage."""
|
|
if instance:
|
|
q = f'(1 - node_filesystem_avail_bytes{{mountpoint="/", instance=~"{instance}.*"}} / node_filesystem_size_bytes{{mountpoint="/", instance=~"{instance}.*"}}) * 100'
|
|
else:
|
|
q = '(1 - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100'
|
|
data = instant_query(q)
|
|
return _extract_metrics(data, "disk_percent")
|
|
|
|
|
|
def get_overview() -> str:
|
|
"""Get a formatted overview of all monitored instances."""
|
|
if not is_available():
|
|
return "Prometheus is not reachable at " + PROMETHEUS_URL
|
|
|
|
targets = get_targets()
|
|
if not targets:
|
|
return "No Prometheus targets found."
|
|
|
|
lines = ["## Prometheus Targets\n"]
|
|
for t in targets:
|
|
status = "UP" if t["health"] == "up" else "DOWN"
|
|
lines.append(f"- [{status}] {t['job']} ({t['instance']})")
|
|
|
|
cpu = get_cpu()
|
|
if "results" in cpu:
|
|
lines.append("\n## CPU Usage")
|
|
for r in cpu["results"]:
|
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
|
|
|
mem = get_memory()
|
|
if "results" in mem:
|
|
lines.append("\n## Memory Usage")
|
|
for r in mem["results"]:
|
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
|
|
|
disk = get_disk()
|
|
if "results" in disk:
|
|
lines.append("\n## Disk Usage (/)")
|
|
for r in disk["results"]:
|
|
lines.append(f"- {r['instance']}: {r['value']:.1f}%")
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _extract_metrics(data: dict, metric_name: str) -> dict:
|
|
if "error" in data and data.get("status") != "success":
|
|
return {"error": data.get("error", "unknown error")}
|
|
|
|
results = []
|
|
for r in data.get("data", {}).get("result", []):
|
|
instance = r.get("metric", {}).get("instance", "unknown")
|
|
value = float(r.get("value", [0, 0])[1])
|
|
results.append({"instance": instance, "value": value, "metric": metric_name})
|
|
return {"results": results}
|