rag: LLM-Query-Rewrite (Stufe B, gpt-4o-mini) als Default-Modus
Query wird vor ES-Suche durch gpt-4o-mini in 4 DE/EN-Varianten umformuliert (Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe). Dann Multi-Query-Merge durch _merge_hits_from_queries. Fallback auf Single-Query bei API-Fehler oder fehlendem Key. 1h-Cache, 8s-Timeout. Loest ua. Kosten-/Preis-Fragen, die zuvor am Standardpfad gescheitert sind (Beispiel: "was haben die wohnungen in kambodscha gekostet" findet jetzt G2010B und D1603 in einem Rutsch).
This commit is contained in:
parent
7bbefdcb78
commit
c63b3621c0
1 changed files with 132 additions and 6 deletions
|
|
@ -6,7 +6,9 @@ RAGFlow bleibt Ingestion; Suche geht direkt an ES (Issue #51).
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
import urllib.error
|
import urllib.error
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
|
|
@ -466,6 +468,103 @@ def _expand_multilingual(q: str) -> list:
|
||||||
return variants[:8]
|
return variants[:8]
|
||||||
# --- /Multi-Query-Erweiterung ---------------------------------------------
|
# --- /Multi-Query-Erweiterung ---------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
# --- LLM Query Rewrite (Stufe B) ---
|
||||||
|
_LLM_REWRITE_CACHE: dict = {}
|
||||||
|
_LLM_REWRITE_TTL = 3600
|
||||||
|
_LLM_REWRITE_MAX = 6
|
||||||
|
_LLM_REWRITE_TIMEOUT = 8
|
||||||
|
_LLM_REWRITE_MODEL = "gpt-4o-mini"
|
||||||
|
_LLM_REWRITE_PROMPT = (
|
||||||
|
"Du bist ein Query-Rewriter fuer eine deutschsprachige Dokumenten-Suche "
|
||||||
|
"(Vertraege, Versicherungen, Rechnungen, Bescheide, Kaufvertraege, Kontoauszuege). "
|
||||||
|
"Formuliere die Nutzer-Frage in {n} unterschiedliche kompakte Such-Queries um. "
|
||||||
|
"Nutze Synonyme, Fachbegriffe, Zahl-/Kosten-/Summenbegriffe und sowohl deutsche "
|
||||||
|
"als auch englische Termini. Jede Query: 2-6 Woerter, Keywords statt Saetze, "
|
||||||
|
"keine Fragezeichen. Antworte NUR als JSON-Liste von Strings, nichts sonst."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _openai_key() -> str:
|
||||||
|
try:
|
||||||
|
from core import config as _cfg # type: ignore
|
||||||
|
v = (_cfg.parse_config().raw.get("OPENAI_API_KEY") or "").strip()
|
||||||
|
if v:
|
||||||
|
return v
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return (os.environ.get("OPENAI_API_KEY") or "").strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _llm_query_rewrite(query: str, n: int = 4) -> list:
|
||||||
|
"""gpt-4o-mini -> bis zu n DE/EN-Reformulierungen. Cache + Fehler-Fallback."""
|
||||||
|
q = (query or "").strip()
|
||||||
|
if not q or n <= 0:
|
||||||
|
return []
|
||||||
|
cache_key = f"{n}::{q.lower()}"
|
||||||
|
now = time.time()
|
||||||
|
entry = _LLM_REWRITE_CACHE.get(cache_key)
|
||||||
|
if entry and (now - entry[0]) < _LLM_REWRITE_TTL:
|
||||||
|
return list(entry[1])
|
||||||
|
key = _openai_key()
|
||||||
|
if not key:
|
||||||
|
return []
|
||||||
|
body = json.dumps({
|
||||||
|
"model": _LLM_REWRITE_MODEL,
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": _LLM_REWRITE_PROMPT.format(n=n)},
|
||||||
|
{"role": "user", "content": q},
|
||||||
|
],
|
||||||
|
"temperature": 0.3,
|
||||||
|
"max_tokens": 220,
|
||||||
|
}).encode()
|
||||||
|
req = urllib.request.Request(
|
||||||
|
"https://api.openai.com/v1/chat/completions",
|
||||||
|
data=body,
|
||||||
|
method="POST",
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=_LLM_REWRITE_TIMEOUT) as resp:
|
||||||
|
data = json.load(resp)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("llm rewrite failed: %s", e)
|
||||||
|
return []
|
||||||
|
try:
|
||||||
|
txt = (data["choices"][0]["message"]["content"] or "").strip()
|
||||||
|
except Exception:
|
||||||
|
return []
|
||||||
|
m = re.search(r"\[[\s\S]*\]", txt)
|
||||||
|
raw = m.group(0) if m else txt
|
||||||
|
variants: list = []
|
||||||
|
try:
|
||||||
|
arr = json.loads(raw)
|
||||||
|
if isinstance(arr, list):
|
||||||
|
for x in arr:
|
||||||
|
if isinstance(x, str):
|
||||||
|
s = x.strip()
|
||||||
|
while s and s[-1] in ".?!;,":
|
||||||
|
s = s[:-1]
|
||||||
|
s = s.strip()
|
||||||
|
if s and s.lower() != q.lower() and s not in variants:
|
||||||
|
variants.append(s)
|
||||||
|
except Exception:
|
||||||
|
for line in txt.splitlines():
|
||||||
|
s = re.sub(r"^[\s\-\*\d\.\)]+", "", line).strip()
|
||||||
|
while s and s[-1] in ".?!;,":
|
||||||
|
s = s[:-1]
|
||||||
|
s = s.strip()
|
||||||
|
if s and s.lower() != q.lower() and s not in variants:
|
||||||
|
variants.append(s)
|
||||||
|
variants = variants[:n]
|
||||||
|
_LLM_REWRITE_CACHE[cache_key] = (now, variants)
|
||||||
|
log.info("llm rewrite %r -> %s variants", q[:60], len(variants))
|
||||||
|
return variants
|
||||||
|
# --- /LLM Query Rewrite ---
|
||||||
|
|
||||||
def handle_rag_search(query: str, top_k: int = 8, **kw):
|
def handle_rag_search(query: str, top_k: int = 8, **kw):
|
||||||
if not query or not query.strip():
|
if not query or not query.strip():
|
||||||
return "rag_search: query fehlt."
|
return "rag_search: query fehlt."
|
||||||
|
|
@ -513,6 +612,33 @@ def handle_rag_search(query: str, top_k: int = 8, **kw):
|
||||||
f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
|
f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
|
||||||
)
|
)
|
||||||
snip_len = 500
|
snip_len = 500
|
||||||
|
else:
|
||||||
|
# Modus 3 (2026-04-16): LLM-Query-Rewrite via gpt-4o-mini.
|
||||||
|
# Fallback auf Single-Query bei API-Fehler / fehlendem Key.
|
||||||
|
rewrites = _llm_query_rewrite(qstrip, n=4)
|
||||||
|
if rewrites:
|
||||||
|
subqs = [qstrip] + [r for r in rewrites if r.lower() != qstrip.lower()]
|
||||||
|
subqs = subqs[:_LLM_REWRITE_MAX]
|
||||||
|
pool_cap = max(top_k * 5, 80)
|
||||||
|
hits, err = _merge_hits_from_queries(
|
||||||
|
subqs,
|
||||||
|
es_size,
|
||||||
|
pool_cap=pool_cap,
|
||||||
|
full_path_dedup=False,
|
||||||
|
)
|
||||||
|
if hits:
|
||||||
|
header = (
|
||||||
|
f"**LLM-Rewrite ({len(subqs)} Varianten, gpt-4o-mini) '{qstrip}' — "
|
||||||
|
f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
|
||||||
|
)
|
||||||
|
snip_len = 600
|
||||||
|
else:
|
||||||
|
data = _es_hybrid_search(qstrip, es_size)
|
||||||
|
if "_error" in data:
|
||||||
|
return f"Fehler bei der Dokumentensuche: {data['_error']}"
|
||||||
|
hits = (data.get("hits") or {}).get("hits") or []
|
||||||
|
header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
|
||||||
|
snip_len = 650
|
||||||
else:
|
else:
|
||||||
data = _es_hybrid_search(qstrip, es_size)
|
data = _es_hybrid_search(qstrip, es_size)
|
||||||
if "_error" in data:
|
if "_error" in data:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue