feat: LLM-gestützte Mail-Klassifizierung (Wichtig/Aktion/Newsletter/Spam)

2026-03-09 15:29:43 +07:00 · 2026-03-09 15:29:43 +07:00 · eb34f354b3
commit eb34f354b3
parent fdf2bc095a
4 changed files with 162 additions and 1 deletions
--- a/homelab-ai-bot/context.py
+++ b/homelab-ai-bot/context.py
@ -221,6 +221,14 @@ def _tool_get_todays_mails() -> str:
    return "\n".join(lines)
 def _tool_get_smart_mail_digest(hours: int = 24) -> str:
    cfg = _load_config()
    mail_client.init(cfg)
    api_key = cfg.api_keys.get("openrouter_key", "")
    digest = mail_client.get_smart_digest(hours=hours, api_key=api_key)
    return mail_client.format_smart_digest(digest)
 def _tool_get_feed_stats() -> str:
    cfg = _load_config()
    ct_109 = config.get_container(cfg, vmid=109)
@ -262,4 +270,5 @@ def get_tool_handlers() -> dict:
        "get_mail_count": lambda: _tool_get_mail_count(),
        "search_mail": lambda query, days=30: _tool_search_mail(query, days=days),
        "get_todays_mails": lambda: _tool_get_todays_mails(),
        "get_smart_mail_digest": lambda hours=24: _tool_get_smart_mail_digest(hours=hours),
    }
--- a/homelab-ai-bot/core/pycache/mail_client.cpython-313.pyc
+++ b/homelab-ai-bot/core/pycache/mail_client.cpython-313.pyc
--- a/homelab-ai-bot/core/mail_client.py
+++ b/homelab-ai-bot/core/mail_client.py
@ -1,7 +1,13 @@
-"""IMAP Mail Client — Liest E-Mails vom All-Inkl Spiegel-Postfach (Read-Only)."""
+"""IMAP Mail Client — Liest E-Mails vom All-Inkl Spiegel-Postfach (Read-Only).
 Stufe 1: Keyword-Filter (IMPORTANT_SENDERS)
 Stufe 2: LLM-Klassifizierung (classify_mails) — trennt Spam/Newsletter von Wichtigem.
 """
 import imaplib
 import email
 import json
 import requests as _req
 from email.header import decode_header
 from email.utils import parsedate_to_datetime
 from datetime import datetime, timedelta, timezone
@ -204,6 +210,138 @@ def get_important_mails(hours: int = 24) -> list[dict]:
        m.logout()
 CLASSIFY_PROMPT = """Du bekommst eine Liste von E-Mails (Absender + Betreff).
 Klassifiziere JEDE Mail in genau eine Kategorie:
 - "wichtig": Rechnungen, Sicherheitswarnungen, Server-Alerts, Bank, Behörden, persönliche Nachrichten
 - "aktion": Erfordert eine Handlung (Passwort ändern, Zahlung fällig, Termin bestätigen)
 - "info": Nützliche Info aber keine Handlung nötig (Versandbestätigung, Status-Update)
 - "newsletter": Newsletter, Marketing, Angebote, Werbung
 - "spam": Offensichtlicher Spam, Phishing, unseriös
 Antworte NUR mit einem JSON-Array. Pro Mail ein Objekt mit "idx" (0-basiert) und "cat" (Kategorie).
 Beispiel: [{"idx":0,"cat":"newsletter"},{"idx":1,"cat":"wichtig"}]"""
 def classify_mails(mails: list[dict], api_key: str) -> list[dict]:
    """LLM-gestützte Klassifizierung von Mails nach Wichtigkeit."""
    if not mails or not api_key:
        return mails
    mail_text = "\n".join(
        f"{i}. Von: {m['from'][:50]} | Betreff: {m['subject'][:80]}"
        for i, m in enumerate(mails)
    )
    try:
        r = _req.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={"Authorization": f"Bearer {api_key}"},
            json={
                "model": "openai/gpt-4o-mini",
                "messages": [
                    {"role": "system", "content": CLASSIFY_PROMPT},
                    {"role": "user", "content": mail_text},
                ],
                "max_tokens": 400,
                "temperature": 0,
            },
            timeout=30,
        )
        r.raise_for_status()
        content = r.json()["choices"][0]["message"]["content"]
        content = content.strip()
        if content.startswith("```"):
            content = content.split("\n", 1)[-1].rsplit("```", 1)[0]
        classifications = json.loads(content)
        cat_map = {c["idx"]: c["cat"] for c in classifications}
        for i, m in enumerate(mails):
            m["category"] = cat_map.get(i, "unknown")
        return mails
    except Exception:
        for m in mails:
            m["category"] = "unknown"
        return mails
 def get_smart_digest(hours: int = 24, api_key: str = "") -> dict:
    """Intelligente Mail-Zusammenfassung: holt Mails, klassifiziert per LLM, gruppiert."""
    m = _connect()
    if not m:
        return {"error": "IMAP-Verbindung fehlgeschlagen"}
    try:
        m.select("INBOX", readonly=True)
        since = (datetime.now() - timedelta(hours=hours)).strftime("%d-%b-%Y")
        _, data = m.search(None, f'(SINCE "{since}")')
        ids = data[0].split() if data[0] else []
        mails = []
        for mid in ids[-50:]:
            _, msg_data = m.fetch(mid, "(BODY.PEEK[HEADER])")
            parsed = _parse_mail(msg_data)
            if parsed:
                mails.append(parsed)
        mails.reverse()
    except Exception as e:
        return {"error": str(e)}
    finally:
        m.logout()
    if not mails:
        return {"total": 0, "mails": [], "summary": {}}
    if api_key:
        mails = classify_mails(mails, api_key)
    summary = {}
    for m_item in mails:
        cat = m_item.get("category", "unknown")
        summary.setdefault(cat, []).append(m_item)
    return {"total": len(mails), "mails": mails, "summary": summary}
 def format_smart_digest(digest: dict) -> str:
    """Formatiert den intelligenten Digest für Telegram."""
    if "error" in digest:
        return f"Mail-Fehler: {digest['error']}"
    if digest.get("total", 0) == 0:
        return "Keine neuen Mails im gewählten Zeitraum."
    lines = [f"📧 Mail-Digest ({digest['total']} Mails)\n"]
    summary = digest.get("summary", {})
    cat_labels = {
        "wichtig": "🔴 Wichtig",
        "aktion": "⚡ Aktion nötig",
        "info": "ℹ️ Info",
        "newsletter": "📰 Newsletter",
        "spam": "🗑️ Spam",
        "unknown": "❓ Unkategorisiert",
    }
    cat_order = ["aktion", "wichtig", "info", "newsletter", "spam", "unknown"]
    for cat in cat_order:
        cat_mails = summary.get(cat, [])
        if not cat_mails:
            continue
        label = cat_labels.get(cat, cat)
        lines.append(f"{label} ({len(cat_mails)}):")
        show = cat_mails if cat in ("wichtig", "aktion", "info") else cat_mails[:3]
        for m_item in show:
            lines.append(f"  {m_item['date_str']} | {m_item['from'][:30]}")
            lines.append(f"    → {m_item['subject'][:65]}")
        if len(cat_mails) > len(show):
            lines.append(f"  ... und {len(cat_mails) - len(show)} weitere")
        lines.append("")
    wichtig = len(summary.get("wichtig", [])) + len(summary.get("aktion", []))
    noise = len(summary.get("newsletter", [])) + len(summary.get("spam", []))
    lines.append(f"Fazit: {wichtig} relevante, {noise} ignorierbare Mails")
    return "\n".join(lines)
 def format_summary() -> str:
    """Komplett-Übersicht: Counts + letzte Mails + wichtige."""
    counts = get_mail_count()
--- a/homelab-ai-bot/llm.py
+++ b/homelab-ai-bot/llm.py
@ -214,6 +214,20 @@ TOOLS = [
            "parameters": {"type": "object", "properties": {}, "required": []},
        },
    },
    {
        "type": "function",
        "function": {
            "name": "get_smart_mail_digest",
            "description": "Intelligente Mail-Zusammenfassung: KI klassifiziert Mails in Wichtig/Aktion/Info/Newsletter/Spam. Nutze dies wenn der User nach 'wichtigen Mails' fragt oder wissen will ob etwas Relevantes dabei ist.",
            "parameters": {
                "type": "object",
                "properties": {
                    "hours": {"type": "integer", "description": "Zeitraum in Stunden (default: 24)", "default": 24},
                },
                "required": [],
            },
        },
    },
 ]