rag: LLM-Query-Rewrite (Stufe B, gpt-4o-mini) als Default-Modus

Query wird vor ES-Suche durch gpt-4o-mini in 4 DE/EN-Varianten umformuliert (Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe). Dann Multi-Query-Merge durch _merge_hits_from_queries. Fallback auf Single-Query bei API-Fehler oder fehlendem Key. 1h-Cache, 8s-Timeout. Loest ua. Kosten-/Preis-Fragen, die zuvor am Standardpfad gescheitert sind (Beispiel: "was haben die wohnungen in kambodscha gekostet" findet jetzt G2010B und D1603 in einem Rutsch).
2026-04-17 21:46:15 +02:00 · 2026-04-17 21:46:15 +02:00 · c63b3621c0
commit c63b3621c0
parent 7bbefdcb78
1 changed files with 132 additions and 6 deletions
--- a/homelab-ai-bot/tools/rag.py
+++ b/homelab-ai-bot/tools/rag.py
@ -6,7 +6,9 @@ RAGFlow bleibt Ingestion; Suche geht direkt an ES (Issue #51).
 import base64
 import json
 import logging
 import os
 import re
 import time
 import urllib.error
 import urllib.request
@ -466,6 +468,103 @@ def _expand_multilingual(q: str) -> list:
    return variants[:8]
 # --- /Multi-Query-Erweiterung ---------------------------------------------
 # --- LLM Query Rewrite (Stufe B) ---
 _LLM_REWRITE_CACHE: dict = {}
 _LLM_REWRITE_TTL = 3600
 _LLM_REWRITE_MAX = 6
 _LLM_REWRITE_TIMEOUT = 8
 _LLM_REWRITE_MODEL = "gpt-4o-mini"
 _LLM_REWRITE_PROMPT = (
    "Du bist ein Query-Rewriter fuer eine deutschsprachige Dokumenten-Suche "
    "(Vertraege, Versicherungen, Rechnungen, Bescheide, Kaufvertraege, Kontoauszuege). "
    "Formuliere die Nutzer-Frage in {n} unterschiedliche kompakte Such-Queries um. "
    "Nutze Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe und sowohl deutsche "
    "als auch englische Termini. Jede Query: 2-6 Woerter, Keywords statt Saetze, "
    "keine Fragezeichen. Antworte NUR als JSON-Liste von Strings, nichts sonst."
 )
 def _openai_key() -> str:
    try:
        from core import config as _cfg  # type: ignore
        v = (_cfg.parse_config().raw.get("OPENAI_API_KEY") or "").strip()
        if v:
            return v
    except Exception:
        pass
    return (os.environ.get("OPENAI_API_KEY") or "").strip()
 def _llm_query_rewrite(query: str, n: int = 4) -> list:
    """gpt-4o-mini -> bis zu n DE/EN-Reformulierungen. Cache + Fehler-Fallback."""
    q = (query or "").strip()
    if not q or n <= 0:
        return []
    cache_key = f"{n}::{q.lower()}"
    now = time.time()
    entry = _LLM_REWRITE_CACHE.get(cache_key)
    if entry and (now - entry[0]) < _LLM_REWRITE_TTL:
        return list(entry[1])
    key = _openai_key()
    if not key:
        return []
    body = json.dumps({
        "model": _LLM_REWRITE_MODEL,
        "messages": [
            {"role": "system", "content": _LLM_REWRITE_PROMPT.format(n=n)},
            {"role": "user", "content": q},
        ],
        "temperature": 0.3,
        "max_tokens": 220,
    }).encode()
    req = urllib.request.Request(
        "https://api.openai.com/v1/chat/completions",
        data=body,
        method="POST",
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {key}",
        },
    )
    try:
        with urllib.request.urlopen(req, timeout=_LLM_REWRITE_TIMEOUT) as resp:
            data = json.load(resp)
    except Exception as e:
        log.warning("llm rewrite failed: %s", e)
        return []
    try:
        txt = (data["choices"][0]["message"]["content"] or "").strip()
    except Exception:
        return []
    m = re.search(r"\[[\s\S]*\]", txt)
    raw = m.group(0) if m else txt
    variants: list = []
    try:
        arr = json.loads(raw)
        if isinstance(arr, list):
            for x in arr:
                if isinstance(x, str):
                    s = x.strip()
                    while s and s[-1] in ".?!;,":
                        s = s[:-1]
                    s = s.strip()
                    if s and s.lower() != q.lower() and s not in variants:
                        variants.append(s)
    except Exception:
        for line in txt.splitlines():
            s = re.sub(r"^[\s\-\*\d\.\)]+", "", line).strip()
            while s and s[-1] in ".?!;,":
                s = s[:-1]
            s = s.strip()
            if s and s.lower() != q.lower() and s not in variants:
                variants.append(s)
    variants = variants[:n]
    _LLM_REWRITE_CACHE[cache_key] = (now, variants)
    log.info("llm rewrite %r -> %s variants", q[:60], len(variants))
    return variants
 # --- /LLM Query Rewrite ---
 def handle_rag_search(query: str, top_k: int = 8, **kw):
    if not query or not query.strip():
        return "rag_search: query fehlt."
@ -513,6 +612,33 @@ def handle_rag_search(query: str, top_k: int = 8, **kw):
            f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
        )
        snip_len = 500
    else:
        # Modus 3 (2026-04-16): LLM-Query-Rewrite via gpt-4o-mini.
        # Fallback auf Single-Query bei API-Fehler / fehlendem Key.
        rewrites = _llm_query_rewrite(qstrip, n=4)
        if rewrites:
            subqs = [qstrip] + [r for r in rewrites if r.lower() != qstrip.lower()]
            subqs = subqs[:_LLM_REWRITE_MAX]
            pool_cap = max(top_k * 5, 80)
            hits, err = _merge_hits_from_queries(
                subqs,
                es_size,
                pool_cap=pool_cap,
                full_path_dedup=False,
            )
            if hits:
                header = (
                    f"**LLM-Rewrite ({len(subqs)} Varianten, gpt-4o-mini) '{qstrip}' — "
                    f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
                )
                snip_len = 600
            else:
                data = _es_hybrid_search(qstrip, es_size)
                if "_error" in data:
                    return f"Fehler bei der Dokumentensuche: {data['_error']}"
                hits = (data.get("hits") or {}).get("hits") or []
                header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
                snip_len = 650
        else:
            data = _es_hybrid_search(qstrip, es_size)
            if "_error" in data: