rag: LLM-Query-Rewrite (Stufe B, gpt-4o-mini) als Default-Modus
Query wird vor ES-Suche durch gpt-4o-mini in 4 DE/EN-Varianten umformuliert (Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe). Dann Multi-Query-Merge durch _merge_hits_from_queries. Fallback auf Single-Query bei API-Fehler oder fehlendem Key. 1h-Cache, 8s-Timeout. Loest ua. Kosten-/Preis-Fragen, die zuvor am Standardpfad gescheitert sind (Beispiel: "was haben die wohnungen in kambodscha gekostet" findet jetzt G2010B und D1603 in einem Rutsch).
This commit is contained in:
parent
7bbefdcb78
commit
c63b3621c0
1 changed files with 132 additions and 6 deletions
|
|
@ -6,7 +6,9 @@ RAGFlow bleibt Ingestion; Suche geht direkt an ES (Issue #51).
|
|||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
|
|
@ -466,6 +468,103 @@ def _expand_multilingual(q: str) -> list:
|
|||
return variants[:8]
|
||||
# --- /Multi-Query-Erweiterung ---------------------------------------------
|
||||
|
||||
|
||||
# --- LLM Query Rewrite (Stufe B) ---
|
||||
_LLM_REWRITE_CACHE: dict = {}
|
||||
_LLM_REWRITE_TTL = 3600
|
||||
_LLM_REWRITE_MAX = 6
|
||||
_LLM_REWRITE_TIMEOUT = 8
|
||||
_LLM_REWRITE_MODEL = "gpt-4o-mini"
|
||||
_LLM_REWRITE_PROMPT = (
|
||||
"Du bist ein Query-Rewriter fuer eine deutschsprachige Dokumenten-Suche "
|
||||
"(Vertraege, Versicherungen, Rechnungen, Bescheide, Kaufvertraege, Kontoauszuege). "
|
||||
"Formuliere die Nutzer-Frage in {n} unterschiedliche kompakte Such-Queries um. "
|
||||
"Nutze Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe und sowohl deutsche "
|
||||
"als auch englische Termini. Jede Query: 2-6 Woerter, Keywords statt Saetze, "
|
||||
"keine Fragezeichen. Antworte NUR als JSON-Liste von Strings, nichts sonst."
|
||||
)
|
||||
|
||||
|
||||
def _openai_key() -> str:
|
||||
try:
|
||||
from core import config as _cfg # type: ignore
|
||||
v = (_cfg.parse_config().raw.get("OPENAI_API_KEY") or "").strip()
|
||||
if v:
|
||||
return v
|
||||
except Exception:
|
||||
pass
|
||||
return (os.environ.get("OPENAI_API_KEY") or "").strip()
|
||||
|
||||
|
||||
def _llm_query_rewrite(query: str, n: int = 4) -> list:
|
||||
"""gpt-4o-mini -> bis zu n DE/EN-Reformulierungen. Cache + Fehler-Fallback."""
|
||||
q = (query or "").strip()
|
||||
if not q or n <= 0:
|
||||
return []
|
||||
cache_key = f"{n}::{q.lower()}"
|
||||
now = time.time()
|
||||
entry = _LLM_REWRITE_CACHE.get(cache_key)
|
||||
if entry and (now - entry[0]) < _LLM_REWRITE_TTL:
|
||||
return list(entry[1])
|
||||
key = _openai_key()
|
||||
if not key:
|
||||
return []
|
||||
body = json.dumps({
|
||||
"model": _LLM_REWRITE_MODEL,
|
||||
"messages": [
|
||||
{"role": "system", "content": _LLM_REWRITE_PROMPT.format(n=n)},
|
||||
{"role": "user", "content": q},
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"max_tokens": 220,
|
||||
}).encode()
|
||||
req = urllib.request.Request(
|
||||
"https://api.openai.com/v1/chat/completions",
|
||||
data=body,
|
||||
method="POST",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {key}",
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=_LLM_REWRITE_TIMEOUT) as resp:
|
||||
data = json.load(resp)
|
||||
except Exception as e:
|
||||
log.warning("llm rewrite failed: %s", e)
|
||||
return []
|
||||
try:
|
||||
txt = (data["choices"][0]["message"]["content"] or "").strip()
|
||||
except Exception:
|
||||
return []
|
||||
m = re.search(r"\[[\s\S]*\]", txt)
|
||||
raw = m.group(0) if m else txt
|
||||
variants: list = []
|
||||
try:
|
||||
arr = json.loads(raw)
|
||||
if isinstance(arr, list):
|
||||
for x in arr:
|
||||
if isinstance(x, str):
|
||||
s = x.strip()
|
||||
while s and s[-1] in ".?!;,":
|
||||
s = s[:-1]
|
||||
s = s.strip()
|
||||
if s and s.lower() != q.lower() and s not in variants:
|
||||
variants.append(s)
|
||||
except Exception:
|
||||
for line in txt.splitlines():
|
||||
s = re.sub(r"^[\s\-\*\d\.\)]+", "", line).strip()
|
||||
while s and s[-1] in ".?!;,":
|
||||
s = s[:-1]
|
||||
s = s.strip()
|
||||
if s and s.lower() != q.lower() and s not in variants:
|
||||
variants.append(s)
|
||||
variants = variants[:n]
|
||||
_LLM_REWRITE_CACHE[cache_key] = (now, variants)
|
||||
log.info("llm rewrite %r -> %s variants", q[:60], len(variants))
|
||||
return variants
|
||||
# --- /LLM Query Rewrite ---
|
||||
|
||||
def handle_rag_search(query: str, top_k: int = 8, **kw):
|
||||
if not query or not query.strip():
|
||||
return "rag_search: query fehlt."
|
||||
|
|
@ -514,12 +613,39 @@ def handle_rag_search(query: str, top_k: int = 8, **kw):
|
|||
)
|
||||
snip_len = 500
|
||||
else:
|
||||
data = _es_hybrid_search(qstrip, es_size)
|
||||
if "_error" in data:
|
||||
return f"Fehler bei der Dokumentensuche: {data['_error']}"
|
||||
hits = (data.get("hits") or {}).get("hits") or []
|
||||
header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
|
||||
snip_len = 650
|
||||
# Modus 3 (2026-04-16): LLM-Query-Rewrite via gpt-4o-mini.
|
||||
# Fallback auf Single-Query bei API-Fehler / fehlendem Key.
|
||||
rewrites = _llm_query_rewrite(qstrip, n=4)
|
||||
if rewrites:
|
||||
subqs = [qstrip] + [r for r in rewrites if r.lower() != qstrip.lower()]
|
||||
subqs = subqs[:_LLM_REWRITE_MAX]
|
||||
pool_cap = max(top_k * 5, 80)
|
||||
hits, err = _merge_hits_from_queries(
|
||||
subqs,
|
||||
es_size,
|
||||
pool_cap=pool_cap,
|
||||
full_path_dedup=False,
|
||||
)
|
||||
if hits:
|
||||
header = (
|
||||
f"**LLM-Rewrite ({len(subqs)} Varianten, gpt-4o-mini) '{qstrip}' — "
|
||||
f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
|
||||
)
|
||||
snip_len = 600
|
||||
else:
|
||||
data = _es_hybrid_search(qstrip, es_size)
|
||||
if "_error" in data:
|
||||
return f"Fehler bei der Dokumentensuche: {data['_error']}"
|
||||
hits = (data.get("hits") or {}).get("hits") or []
|
||||
header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
|
||||
snip_len = 650
|
||||
else:
|
||||
data = _es_hybrid_search(qstrip, es_size)
|
||||
if "_error" in data:
|
||||
return f"Fehler bei der Dokumentensuche: {data['_error']}"
|
||||
hits = (data.get("hits") or {}).get("hits") or []
|
||||
header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
|
||||
snip_len = 650
|
||||
|
||||
if not hits:
|
||||
return f"Keine Ergebnisse fuer '{qstrip}' in der Wissensbasis gefunden."
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue