feat: LLM-gestützte Mail-Klassifizierung (Wichtig/Aktion/Newsletter/Spam)

2026-03-09 15:29:43 +07:00 · 2026-03-09 15:29:43 +07:00 · eb34f354b3
commit eb34f354b3
parent fdf2bc095a
4 changed files with 162 additions and 1 deletions
--- a/homelab-ai-bot/context.py
+++ b/homelab-ai-bot/context.py
@ -221,6 +221,14 @@ def _tool_get_todays_mails() -> str:
    return "\n".join(lines)


+def _tool_get_smart_mail_digest(hours: int = 24) -> str:
+    cfg = _load_config()
+    mail_client.init(cfg)
+    api_key = cfg.api_keys.get("openrouter_key", "")
+    digest = mail_client.get_smart_digest(hours=hours, api_key=api_key)
+    return mail_client.format_smart_digest(digest)
+
+
 def _tool_get_feed_stats() -> str:
    cfg = _load_config()
    ct_109 = config.get_container(cfg, vmid=109)
@ -262,4 +270,5 @@ def get_tool_handlers() -> dict:
        "get_mail_count": lambda: _tool_get_mail_count(),
        "search_mail": lambda query, days=30: _tool_search_mail(query, days=days),
        "get_todays_mails": lambda: _tool_get_todays_mails(),
+        "get_smart_mail_digest": lambda hours=24: _tool_get_smart_mail_digest(hours=hours),
    }
--- a/homelab-ai-bot/core/pycache/mail_client.cpython-313.pyc
+++ b/homelab-ai-bot/core/pycache/mail_client.cpython-313.pyc
--- a/homelab-ai-bot/core/mail_client.py
+++ b/homelab-ai-bot/core/mail_client.py
@ -1,7 +1,13 @@
-"""IMAP Mail Client — Liest E-Mails vom All-Inkl Spiegel-Postfach (Read-Only)."""
+"""IMAP Mail Client — Liest E-Mails vom All-Inkl Spiegel-Postfach (Read-Only).
+
+Stufe 1: Keyword-Filter (IMPORTANT_SENDERS)
+Stufe 2: LLM-Klassifizierung (classify_mails) — trennt Spam/Newsletter von Wichtigem.
+"""

 import imaplib
 import email
+import json
+import requests as _req
 from email.header import decode_header
 from email.utils import parsedate_to_datetime
 from datetime import datetime, timedelta, timezone
@ -204,6 +210,138 @@ def get_important_mails(hours: int = 24) -> list[dict]:
        m.logout()


+CLASSIFY_PROMPT = """Du bekommst eine Liste von E-Mails (Absender + Betreff).
+Klassifiziere JEDE Mail in genau eine Kategorie:
+- "wichtig": Rechnungen, Sicherheitswarnungen, Server-Alerts, Bank, Behörden, persönliche Nachrichten
+- "aktion": Erfordert eine Handlung (Passwort ändern, Zahlung fällig, Termin bestätigen)
+- "info": Nützliche Info aber keine Handlung nötig (Versandbestätigung, Status-Update)
+- "newsletter": Newsletter, Marketing, Angebote, Werbung
+- "spam": Offensichtlicher Spam, Phishing, unseriös
+
+Antworte NUR mit einem JSON-Array. Pro Mail ein Objekt mit "idx" (0-basiert) und "cat" (Kategorie).
+Beispiel: [{"idx":0,"cat":"newsletter"},{"idx":1,"cat":"wichtig"}]"""
+
+
+def classify_mails(mails: list[dict], api_key: str) -> list[dict]:
+    """LLM-gestützte Klassifizierung von Mails nach Wichtigkeit."""
+    if not mails or not api_key:
+        return mails
+
+    mail_text = "\n".join(
+        f"{i}. Von: {m['from'][:50]} | Betreff: {m['subject'][:80]}"
+        for i, m in enumerate(mails)
+    )
+
+    try:
+        r = _req.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}"},
+            json={
+                "model": "openai/gpt-4o-mini",
+                "messages": [
+                    {"role": "system", "content": CLASSIFY_PROMPT},
+                    {"role": "user", "content": mail_text},
+                ],
+                "max_tokens": 400,
+                "temperature": 0,
+            },
+            timeout=30,
+        )
+        r.raise_for_status()
+        content = r.json()["choices"][0]["message"]["content"]
+        content = content.strip()
+        if content.startswith("```"):
+            content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
+        classifications = json.loads(content)
+
+        cat_map = {c["idx"]: c["cat"] for c in classifications}
+        for i, m in enumerate(mails):
+            m["category"] = cat_map.get(i, "unknown")
+        return mails
+    except Exception:
+        for m in mails:
+            m["category"] = "unknown"
+        return mails
+
+
+def get_smart_digest(hours: int = 24, api_key: str = "") -> dict:
+    """Intelligente Mail-Zusammenfassung: holt Mails, klassifiziert per LLM, gruppiert."""
+    m = _connect()
+    if not m:
+        return {"error": "IMAP-Verbindung fehlgeschlagen"}
+    try:
+        m.select("INBOX", readonly=True)
+        since = (datetime.now() - timedelta(hours=hours)).strftime("%d-%b-%Y")
+        _, data = m.search(None, f'(SINCE "{since}")')
+        ids = data[0].split() if data[0] else []
+
+        mails = []
+        for mid in ids[-50:]:
+            _, msg_data = m.fetch(mid, "(BODY.PEEK[HEADER])")
+            parsed = _parse_mail(msg_data)
+            if parsed:
+                mails.append(parsed)
+        mails.reverse()
+    except Exception as e:
+        return {"error": str(e)}
+    finally:
+        m.logout()
+
+    if not mails:
+        return {"total": 0, "mails": [], "summary": {}}
+
+    if api_key:
+        mails = classify_mails(mails, api_key)
+
+    summary = {}
+    for m_item in mails:
+        cat = m_item.get("category", "unknown")
+        summary.setdefault(cat, []).append(m_item)
+
+    return {"total": len(mails), "mails": mails, "summary": summary}
+
+
+def format_smart_digest(digest: dict) -> str:
+    """Formatiert den intelligenten Digest für Telegram."""
+    if "error" in digest:
+        return f"Mail-Fehler: {digest['error']}"
+    if digest.get("total", 0) == 0:
+        return "Keine neuen Mails im gewählten Zeitraum."
+
+    lines = [f"📧 Mail-Digest ({digest['total']} Mails)\n"]
+    summary = digest.get("summary", {})
+
+    cat_labels = {
+        "wichtig": "🔴 Wichtig",
+        "aktion": "⚡ Aktion nötig",
+        "info": "ℹ️ Info",
+        "newsletter": "📰 Newsletter",
+        "spam": "🗑️ Spam",
+        "unknown": "❓ Unkategorisiert",
+    }
+    cat_order = ["aktion", "wichtig", "info", "newsletter", "spam", "unknown"]
+
+    for cat in cat_order:
+        cat_mails = summary.get(cat, [])
+        if not cat_mails:
+            continue
+        label = cat_labels.get(cat, cat)
+        lines.append(f"{label} ({len(cat_mails)}):")
+        show = cat_mails if cat in ("wichtig", "aktion", "info") else cat_mails[:3]
+        for m_item in show:
+            lines.append(f"  {m_item['date_str']} | {m_item['from'][:30]}")
+            lines.append(f"    → {m_item['subject'][:65]}")
+        if len(cat_mails) > len(show):
+            lines.append(f"  ... und {len(cat_mails) - len(show)} weitere")
+        lines.append("")
+
+    wichtig = len(summary.get("wichtig", [])) + len(summary.get("aktion", []))
+    noise = len(summary.get("newsletter", [])) + len(summary.get("spam", []))
+    lines.append(f"Fazit: {wichtig} relevante, {noise} ignorierbare Mails")
+
+    return "\n".join(lines)
+
+
 def format_summary() -> str:
    """Komplett-Übersicht: Counts + letzte Mails + wichtige."""
    counts = get_mail_count()
--- a/homelab-ai-bot/llm.py
+++ b/homelab-ai-bot/llm.py
@ -214,6 +214,20 @@ TOOLS = [
            "parameters": {"type": "object", "properties": {}, "required": []},
        },
    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_smart_mail_digest",
+            "description": "Intelligente Mail-Zusammenfassung: KI klassifiziert Mails in Wichtig/Aktion/Info/Newsletter/Spam. Nutze dies wenn der User nach 'wichtigen Mails' fragt oder wissen will ob etwas Relevantes dabei ist.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "hours": {"type": "integer", "description": "Zeitraum in Stunden (default: 24)", "default": 24},
+                },
+                "required": [],
+            },
+        },
+    },
 ]