Code-seitige Plausibilitaetspruefung: _check_flight_plausibility parst Segmente und flaggt verdaechtige Layover >20h

This commit is contained in:
root 2026-03-16 09:48:09 +07:00
parent 783e043353
commit fbf1d2e28c

View file

@ -424,6 +424,10 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer)
if plausibility_warning:
answer += plausibility_warning
if session_id:
user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]"
memory_client.log_message(session_id, "user", user_msg)
@ -435,6 +439,85 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
def _check_flight_plausibility(text: str) -> str:
"""Prueft LLM-Antwort auf verdaechtige Layover-Zeiten zwischen Flugsegmenten.
Parst Datum/Uhrzeit-Paare aus der strukturierten Antwort und flaggt
Segmentueberg aenge mit >20h berechneter Umsteigezeit bei <3h Uhrzeitdifferenz.
"""
import re
from datetime import datetime, timedelta
MONTHS = {"JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6,
"JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12}
segments = []
current_seg = {}
for line in text.split("\n"):
line_clean = line.strip().replace("**", "").replace("*", "")
date_match = re.search(r"Datum:\s*(\d{1,2})\s*(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)", line_clean, re.IGNORECASE)
if date_match:
day = int(date_match.group(1))
month = MONTHS.get(date_match.group(2).upper(), 0)
current_seg["date_day"] = day
current_seg["date_month"] = month
dep_match = re.search(r"Abflug:\s*(\d{1,2}):(\d{2})", line_clean)
if dep_match:
current_seg["dep_h"] = int(dep_match.group(1))
current_seg["dep_m"] = int(dep_match.group(2))
arr_match = re.search(r"Ankunft:\s*(\d{1,2}):(\d{2})", line_clean)
if arr_match:
current_seg["arr_h"] = int(arr_match.group(1))
current_seg["arr_m"] = int(arr_match.group(2))
next_day = "chster Tag" in line_clean or "+1" in line_clean
current_seg["arr_next_day"] = next_day
if all(k in current_seg for k in ("date_day", "date_month", "dep_h", "arr_h")):
if current_seg not in segments:
segments.append(dict(current_seg))
current_seg = {}
if len(segments) < 2:
return ""
warnings = []
year = 2026
for i in range(len(segments) - 1):
s1 = segments[i]
s2 = segments[i + 1]
try:
arr_day_offset = 1 if s1.get("arr_next_day") else 0
arr_dt = datetime(year, s1["date_month"], s1["date_day"], s1.get("arr_h", 0), s1.get("arr_m", 0)) + timedelta(days=arr_day_offset)
dep_dt = datetime(year, s2["date_month"], s2["date_day"], s2.get("dep_h", 0), s2.get("dep_m", 0))
layover = dep_dt - arr_dt
layover_h = layover.total_seconds() / 3600
time_diff_minutes = abs(s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) - s2.get("dep_h", 0) * 60 - s2.get("dep_m", 0))
if layover_h > 20 and time_diff_minutes < 180:
same_day_layover_min = time_diff_minutes if s2.get("dep_h", 0) * 60 + s2.get("dep_m", 0) > s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) else (1440 - time_diff_minutes)
warnings.append(
f"⚠️ Segment {i+1}{i+2}: Berechnete Umsteigezeit = {layover_h:.0f}h. "
f"Die Uhrzeiten liegen nur {time_diff_minutes} Min auseinander. "
f"Moeglicherweise ist das Datum von Segment {i+2} falsch gelesen "
f"({s2['date_day']:02d}.{s2['date_month']:02d}. statt "
f"{s1['date_day']:02d}.{s1['date_month']:02d}.). Bitte auf dem Ticket pruefen."
)
except (ValueError, OverflowError):
continue
if warnings:
return "\n\n🔍 Plausibilitaetspruefung:\n" + "\n".join(warnings)
return ""
def _extract_pdf_text(pdf_bytes: bytes) -> str:
"""Extrahiert Text aus PDF via PyPDF2. Gibt leeren String zurueck wenn kein Text."""
try:
@ -478,6 +561,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer)
if plausibility_warning:
answer += plausibility_warning
if session_id:
user_msg = f"[Bild-Datei] {caption}" if caption else "[Bild-Datei gesendet]"
memory_client.log_message(session_id, "user", user_msg)
@ -510,6 +597,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_tools(full_prompt, handlers, session_id=session_id)
plausibility_warning = _check_flight_plausibility(answer)
if plausibility_warning:
answer += plausibility_warning
if session_id:
user_msg = f"[PDF: {doc.file_name or 'dokument.pdf'}] {caption}" if caption else f"[PDF: {doc.file_name or 'dokument.pdf'}]"
memory_client.log_message(session_id, "user", user_msg)