Plausibilitaetskorrektur: schreibt Datumskorrektur ins Memory wenn OCR-Fehler erkannt

This commit is contained in:
root 2026-03-16 09:52:07 +07:00
parent fbf1d2e28c
commit f0332b2f98

View file

@ -424,9 +424,10 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id) handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id) answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer) warning_text, warnings = _check_flight_plausibility(answer)
if plausibility_warning: if warning_text:
answer += plausibility_warning answer += warning_text
_store_plausibility_corrections(warnings)
if session_id: if session_id:
user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]" user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]"
@ -502,20 +503,58 @@ def _check_flight_plausibility(text: str) -> str:
time_diff_minutes = abs(s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) - s2.get("dep_h", 0) * 60 - s2.get("dep_m", 0)) time_diff_minutes = abs(s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) - s2.get("dep_h", 0) * 60 - s2.get("dep_m", 0))
if layover_h > 20 and time_diff_minutes < 180: if layover_h > 20 and time_diff_minutes < 180:
same_day_layover_min = time_diff_minutes if s2.get("dep_h", 0) * 60 + s2.get("dep_m", 0) > s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) else (1440 - time_diff_minutes) likely_day = s1["date_day"] + (1 if s1.get("arr_next_day") else 0)
warnings.append( likely_month = s1["date_month"]
f"⚠️ Segment {i+1}{i+2}: Berechnete Umsteigezeit = {layover_h:.0f}h. " if likely_day > 28:
f"Die Uhrzeiten liegen nur {time_diff_minutes} Min auseinander. " likely_day = s1["date_day"]
f"Moeglicherweise ist das Datum von Segment {i+2} falsch gelesen " warnings.append({
f"({s2['date_day']:02d}.{s2['date_month']:02d}. statt " "text": (
f"{s1['date_day']:02d}.{s1['date_month']:02d}.). Bitte auf dem Ticket pruefen." f"⚠️ Segment {i+1}{i+2}: Berechnete Umsteigezeit = {layover_h:.0f}h. "
) f"Die Uhrzeiten liegen nur {time_diff_minutes} Min auseinander. "
f"Moeglicherweise ist das Datum von Segment {i+2} falsch gelesen "
f"({s2['date_day']:02d}.{s2['date_month']:02d}. statt "
f"{likely_day:02d}.{likely_month:02d}.). Bitte auf dem Ticket pruefen."
),
"wrong_date": f"{s2['date_day']:02d}.{s2['date_month']:02d}.",
"likely_date": f"{likely_day:02d}.{likely_month:02d}.",
"segment": i + 2,
})
except (ValueError, OverflowError): except (ValueError, OverflowError):
continue continue
if warnings: if warnings:
return "\n\n🔍 Plausibilitaetspruefung:\n" + "\n".join(warnings) warning_text = "\n\n🔍 Plausibilitaetspruefung:\n" + "\n".join(w["text"] for w in warnings)
return "" return warning_text, warnings
return "", []
def _store_plausibility_corrections(warnings: list):
"""Schreibt Korrektur-Hinweise ins Memory wenn Plausibilitaetsprobleme erkannt wurden."""
if not warnings:
return
for w in warnings:
try:
correction = (
f"ACHTUNG Datumskorrektur Flug-Segment {w['segment']}: "
f"OCR las {w['wrong_date']}, korrektes Datum vermutlich {w['likely_date']}. "
f"Plausibilitaetspruefung: Umsteigezeit waere sonst >20h bei nur wenigen Minuten Uhrzeitdifferenz."
)
data = {
"scope": "user",
"kind": "fact",
"content": correction,
"memory_type": "fact",
"confidence": "high",
"source_type": "system_plausibility_check",
}
result = memory_client._post("/memory", data)
if result:
log.info("Plausibilitaetskorrektur ins Memory geschrieben: Segment %d, %s -> %s",
w["segment"], w["wrong_date"], w["likely_date"])
else:
log.warning("Memory-API gab kein Ergebnis fuer Plausibilitaetskorrektur")
except Exception as e:
log.warning("Konnte Plausibilitaetskorrektur nicht speichern: %s", e)
def _extract_pdf_text(pdf_bytes: bytes) -> str: def _extract_pdf_text(pdf_bytes: bytes) -> str:
@ -561,9 +600,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id) handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id) answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer) warning_text, warnings = _check_flight_plausibility(answer)
if plausibility_warning: if warning_text:
answer += plausibility_warning answer += warning_text
_store_plausibility_corrections(warnings)
if session_id: if session_id:
user_msg = f"[Bild-Datei] {caption}" if caption else "[Bild-Datei gesendet]" user_msg = f"[Bild-Datei] {caption}" if caption else "[Bild-Datei gesendet]"
@ -597,9 +637,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id) handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_tools(full_prompt, handlers, session_id=session_id) answer = llm.ask_with_tools(full_prompt, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer) warning_text, warnings = _check_flight_plausibility(answer)
if plausibility_warning: if warning_text:
answer += plausibility_warning answer += warning_text
_store_plausibility_corrections(warnings)
if session_id: if session_id:
user_msg = f"[PDF: {doc.file_name or 'dokument.pdf'}] {caption}" if caption else f"[PDF: {doc.file_name or 'dokument.pdf'}]" user_msg = f"[PDF: {doc.file_name or 'dokument.pdf'}] {caption}" if caption else f"[PDF: {doc.file_name or 'dokument.pdf'}]"