feat: harte Qualitaetspruefung fuer Preisrecherche
Bei Preisfragen validiert deep_research jetzt die Antwort auf: - konkrete Preise mit Waehrung - Delta in Prozent - Preisvergleich damals/heute - mindestens 3 konkrete Quellen-Links Wenn die Qualitaet zu schwach ist: 1) automatischer Retry mit strikter Anfrage 2) falls weiter unzureichend: klare Fehlermeldung "keine belastbaren Preisdaten gefunden"
This commit is contained in:
parent
5fa521f19a
commit
e0afaff058
1 changed files with 103 additions and 34 deletions
|
|
@ -1,8 +1,10 @@
|
|||
"""Deep Research Tool — Open Deep Research (CT 121) via LangGraph API."""
|
||||
|
||||
import requests
|
||||
import time
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
|
||||
import requests
|
||||
|
||||
log = logging.getLogger("deep_research")
|
||||
|
||||
|
|
@ -17,7 +19,12 @@ Du hast Zugriff auf deep_research — eine KI-gestuetzte Tiefenrecherche die 20-
|
|||
Nutze es wenn der User explizit "recherchiere", "finde heraus", "vergleiche" sagt oder eine komplexe Frage hat.
|
||||
NICHT fuer einfache Fakten oder Homelab-Fragen.
|
||||
WICHTIG: deep_research dauert 2-5 Minuten. Das ist normal. Warte auf das Ergebnis.
|
||||
Das Ergebnis ist ein ausfuehrlicher Report. Fasse ihn fuer Telegram zusammen (max ~3000 Zeichen)."""
|
||||
Das Ergebnis ist ein ausfuehrlicher Report. Fasse ihn fuer Telegram zusammen (max ~3000 Zeichen).
|
||||
|
||||
QUALITAET BEI PREISFRAGEN:
|
||||
- Liefere konkrete Zahlen statt allgemeiner Markttexte.
|
||||
- Zeige Zeitraum, Preis damals/heute, Delta in % und Quellen.
|
||||
- Wenn keine belastbaren Daten vorhanden sind, sage es explizit."""
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
|
|
@ -41,8 +48,7 @@ TOOLS = [
|
|||
|
||||
|
||||
def _create_thread():
|
||||
r = requests.post(f"{DEEP_RESEARCH_URL}/threads",
|
||||
json={}, timeout=10)
|
||||
r = requests.post(f"{DEEP_RESEARCH_URL}/threads", json={}, timeout=10)
|
||||
r.raise_for_status()
|
||||
return r.json()["thread_id"]
|
||||
|
||||
|
|
@ -50,9 +56,7 @@ def _create_thread():
|
|||
def _start_run(thread_id, query):
|
||||
payload = {
|
||||
"assistant_id": ASSISTANT_ID,
|
||||
"input": {
|
||||
"messages": [{"role": "user", "content": query}]
|
||||
},
|
||||
"input": {"messages": [{"role": "user", "content": query}]},
|
||||
"config": {
|
||||
"configurable": {
|
||||
"summarization_model": f"openai:{RESEARCH_MODEL}",
|
||||
|
|
@ -61,10 +65,11 @@ def _start_run(thread_id, query):
|
|||
"final_report_model": f"openai:{RESEARCH_MODEL}",
|
||||
"allow_clarification": False,
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
r = requests.post(f"{DEEP_RESEARCH_URL}/threads/{thread_id}/runs",
|
||||
json=payload, timeout=30)
|
||||
r = requests.post(
|
||||
f"{DEEP_RESEARCH_URL}/threads/{thread_id}/runs", json=payload, timeout=30
|
||||
)
|
||||
r.raise_for_status()
|
||||
return r.json()["run_id"]
|
||||
|
||||
|
|
@ -76,8 +81,8 @@ def _poll_run(thread_id, run_id):
|
|||
elapsed += POLL_INTERVAL
|
||||
try:
|
||||
r = requests.get(
|
||||
f"{DEEP_RESEARCH_URL}/threads/{thread_id}/runs/{run_id}",
|
||||
timeout=10)
|
||||
f"{DEEP_RESEARCH_URL}/threads/{thread_id}/runs/{run_id}", timeout=10
|
||||
)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
status = data.get("status", "unknown")
|
||||
|
|
@ -96,14 +101,11 @@ def _poll_run(thread_id, run_id):
|
|||
|
||||
|
||||
def _get_result(thread_id):
|
||||
r = requests.get(f"{DEEP_RESEARCH_URL}/threads/{thread_id}/state",
|
||||
timeout=30)
|
||||
r = requests.get(f"{DEEP_RESEARCH_URL}/threads/{thread_id}/state", timeout=30)
|
||||
r.raise_for_status()
|
||||
state = r.json()
|
||||
|
||||
values = state.get("values", {})
|
||||
messages = values.get("messages", [])
|
||||
|
||||
messages = state.get("values", {}).get("messages", [])
|
||||
log.info("Messages in result: %d", len(messages))
|
||||
for i, msg in enumerate(messages):
|
||||
content = msg.get("content", "")
|
||||
|
|
@ -118,26 +120,95 @@ def _get_result(thread_id):
|
|||
return "Kein Report generiert."
|
||||
|
||||
|
||||
def handle_deep_research(query: str, **kw):
|
||||
log.info("deep_research gestartet: %s", query[:100])
|
||||
try:
|
||||
def _is_price_query(query: str) -> bool:
|
||||
q = (query or "").lower()
|
||||
needles = [
|
||||
"preis",
|
||||
"preise",
|
||||
"kosten",
|
||||
"teuer",
|
||||
"guenstig",
|
||||
"ram",
|
||||
"ddr4",
|
||||
"ddr5",
|
||||
"entwicklung",
|
||||
]
|
||||
return any(n in q for n in needles)
|
||||
|
||||
|
||||
def _price_report_quality(report: str):
|
||||
text = report or ""
|
||||
links = re.findall(r"https?://\S+", text)
|
||||
has_percent = bool(re.search(r"[-+]?\d+[\.,]?\d*\s*%", text))
|
||||
has_currency = bool(re.search(r"(?:\d+[\.,]?\d*\s?(?:€|eur|\$))|(?:€\s?\d+)", text, re.I))
|
||||
has_comparison = bool(
|
||||
re.search(r"(damals|heute|vor\s+\d+\s+(?:monaten|wochen)|aktuell|delta)", text, re.I)
|
||||
)
|
||||
|
||||
missing = []
|
||||
if len(links) < 3:
|
||||
missing.append("mindestens 3 konkrete Quellen-Links")
|
||||
if not has_percent:
|
||||
missing.append("Delta in %")
|
||||
if not has_currency:
|
||||
missing.append("konkrete Preise mit Waehrung")
|
||||
if not has_comparison:
|
||||
missing.append("Preisvergleich damals/heute")
|
||||
|
||||
return len(missing) == 0, missing
|
||||
|
||||
|
||||
def _run_research(query: str):
|
||||
thread_id = _create_thread()
|
||||
log.info("Thread erstellt: %s", thread_id)
|
||||
|
||||
run_id = _start_run(thread_id, query)
|
||||
log.info("Run gestartet: %s", run_id)
|
||||
|
||||
ok, error = _poll_run(thread_id, run_id)
|
||||
if not ok:
|
||||
log.error("Research fehlgeschlagen: %s", error)
|
||||
return f"Deep Research fehlgeschlagen: {error}"
|
||||
|
||||
return False, f"Deep Research fehlgeschlagen: {error}"
|
||||
report = _get_result(thread_id)
|
||||
log.info("Report erhalten: %d Zeichen", len(report))
|
||||
return True, report
|
||||
|
||||
|
||||
def handle_deep_research(query: str, **kw):
|
||||
log.info("deep_research gestartet: %s", query[:120])
|
||||
try:
|
||||
ok, report = _run_research(query)
|
||||
if not ok:
|
||||
return report
|
||||
|
||||
# Harte Qualitaetspruefung fuer Preisfragen
|
||||
if _is_price_query(query):
|
||||
good, missing = _price_report_quality(report)
|
||||
if not good:
|
||||
log.warning("Preisreport zu schwach, starte Retry. Missing: %s", ", ".join(missing))
|
||||
stricter_query = (
|
||||
query
|
||||
+ "\n\nLIEFERE NUR belastbare Preisdaten im Format:\n"
|
||||
+ "1) Zeitraum (exakt)\n"
|
||||
+ "2) Preis damals -> Preis heute (EUR)\n"
|
||||
+ "3) Delta in %\n"
|
||||
+ "4) 3-5 konkrete Quellen-Links (keine Startseiten).\n"
|
||||
+ "Wenn unklar: explizit keine belastbaren Preisdaten gefunden."
|
||||
)
|
||||
ok2, report2 = _run_research(stricter_query)
|
||||
if ok2:
|
||||
good2, missing2 = _price_report_quality(report2)
|
||||
if good2:
|
||||
report = report2
|
||||
else:
|
||||
return (
|
||||
"keine belastbaren Preisdaten gefunden. "
|
||||
"Es fehlen: "
|
||||
+ ", ".join(missing2)
|
||||
+ ". Bitte Anfrage enger formulieren (Produktklasse + Region + Zeitraum)."
|
||||
)
|
||||
else:
|
||||
return report2
|
||||
|
||||
if len(report) > 6000:
|
||||
report = report[:6000] + "\n\n[... Report gekuerzt]"
|
||||
|
||||
return report
|
||||
|
||||
except requests.ConnectionError:
|
||||
|
|
@ -148,6 +219,4 @@ def handle_deep_research(query: str, **kw):
|
|||
return f"Deep Research Fehler: {e}"
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"deep_research": handle_deep_research,
|
||||
}
|
||||
HANDLERS = {"deep_research": handle_deep_research}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue