diff --git a/homelab-ai-bot/tools/rag.py b/homelab-ai-bot/tools/rag.py index 9fd62c10..b08ba3e3 100644 --- a/homelab-ai-bot/tools/rag.py +++ b/homelab-ai-bot/tools/rag.py @@ -6,7 +6,9 @@ RAGFlow bleibt Ingestion; Suche geht direkt an ES (Issue #51). import base64 import json import logging +import os import re +import time import urllib.error import urllib.request @@ -466,6 +468,103 @@ def _expand_multilingual(q: str) -> list: return variants[:8] # --- /Multi-Query-Erweiterung --------------------------------------------- + +# --- LLM Query Rewrite (Stufe B) --- +_LLM_REWRITE_CACHE: dict = {} +_LLM_REWRITE_TTL = 3600 +_LLM_REWRITE_MAX = 6 +_LLM_REWRITE_TIMEOUT = 8 +_LLM_REWRITE_MODEL = "gpt-4o-mini" +_LLM_REWRITE_PROMPT = ( + "Du bist ein Query-Rewriter fuer eine deutschsprachige Dokumenten-Suche " + "(Vertraege, Versicherungen, Rechnungen, Bescheide, Kaufvertraege, Kontoauszuege). " + "Formuliere die Nutzer-Frage in {n} unterschiedliche kompakte Such-Queries um. " + "Nutze Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe und sowohl deutsche " + "als auch englische Termini. Jede Query: 2-6 Woerter, Keywords statt Saetze, " + "keine Fragezeichen. Antworte NUR als JSON-Liste von Strings, nichts sonst." +) + + +def _openai_key() -> str: + try: + from core import config as _cfg # type: ignore + v = (_cfg.parse_config().raw.get("OPENAI_API_KEY") or "").strip() + if v: + return v + except Exception: + pass + return (os.environ.get("OPENAI_API_KEY") or "").strip() + + +def _llm_query_rewrite(query: str, n: int = 4) -> list: + """gpt-4o-mini -> bis zu n DE/EN-Reformulierungen. Cache + Fehler-Fallback.""" + q = (query or "").strip() + if not q or n <= 0: + return [] + cache_key = f"{n}::{q.lower()}" + now = time.time() + entry = _LLM_REWRITE_CACHE.get(cache_key) + if entry and (now - entry[0]) < _LLM_REWRITE_TTL: + return list(entry[1]) + key = _openai_key() + if not key: + return [] + body = json.dumps({ + "model": _LLM_REWRITE_MODEL, + "messages": [ + {"role": "system", "content": _LLM_REWRITE_PROMPT.format(n=n)}, + {"role": "user", "content": q}, + ], + "temperature": 0.3, + "max_tokens": 220, + }).encode() + req = urllib.request.Request( + "https://api.openai.com/v1/chat/completions", + data=body, + method="POST", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {key}", + }, + ) + try: + with urllib.request.urlopen(req, timeout=_LLM_REWRITE_TIMEOUT) as resp: + data = json.load(resp) + except Exception as e: + log.warning("llm rewrite failed: %s", e) + return [] + try: + txt = (data["choices"][0]["message"]["content"] or "").strip() + except Exception: + return [] + m = re.search(r"\[[\s\S]*\]", txt) + raw = m.group(0) if m else txt + variants: list = [] + try: + arr = json.loads(raw) + if isinstance(arr, list): + for x in arr: + if isinstance(x, str): + s = x.strip() + while s and s[-1] in ".?!;,": + s = s[:-1] + s = s.strip() + if s and s.lower() != q.lower() and s not in variants: + variants.append(s) + except Exception: + for line in txt.splitlines(): + s = re.sub(r"^[\s\-\*\d\.\)]+", "", line).strip() + while s and s[-1] in ".?!;,": + s = s[:-1] + s = s.strip() + if s and s.lower() != q.lower() and s not in variants: + variants.append(s) + variants = variants[:n] + _LLM_REWRITE_CACHE[cache_key] = (now, variants) + log.info("llm rewrite %r -> %s variants", q[:60], len(variants)) + return variants +# --- /LLM Query Rewrite --- + def handle_rag_search(query: str, top_k: int = 8, **kw): if not query or not query.strip(): return "rag_search: query fehlt." @@ -514,12 +613,39 @@ def handle_rag_search(query: str, top_k: int = 8, **kw): ) snip_len = 500 else: - data = _es_hybrid_search(qstrip, es_size) - if "_error" in data: - return f"Fehler bei der Dokumentensuche: {data['_error']}" - hits = (data.get("hits") or {}).get("hits") or [] - header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n" - snip_len = 650 + # Modus 3 (2026-04-16): LLM-Query-Rewrite via gpt-4o-mini. + # Fallback auf Single-Query bei API-Fehler / fehlendem Key. + rewrites = _llm_query_rewrite(qstrip, n=4) + if rewrites: + subqs = [qstrip] + [r for r in rewrites if r.lower() != qstrip.lower()] + subqs = subqs[:_LLM_REWRITE_MAX] + pool_cap = max(top_k * 5, 80) + hits, err = _merge_hits_from_queries( + subqs, + es_size, + pool_cap=pool_cap, + full_path_dedup=False, + ) + if hits: + header = ( + f"**LLM-Rewrite ({len(subqs)} Varianten, gpt-4o-mini) '{qstrip}' — " + f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n" + ) + snip_len = 600 + else: + data = _es_hybrid_search(qstrip, es_size) + if "_error" in data: + return f"Fehler bei der Dokumentensuche: {data['_error']}" + hits = (data.get("hits") or {}).get("hits") or [] + header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n" + snip_len = 650 + else: + data = _es_hybrid_search(qstrip, es_size) + if "_error" in data: + return f"Fehler bei der Dokumentensuche: {data['_error']}" + hits = (data.get("hits") or {}).get("hits") or [] + header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n" + snip_len = 650 if not hits: return f"Keine Ergebnisse fuer '{qstrip}' in der Wissensbasis gefunden."