Code-seitige Plausibilitaetspruefung: _check_flight_plausibility parst Segmente und flaggt verdaechtige Layover >20h

2026-03-16 09:48:09 +07:00 · 2026-03-16 09:48:09 +07:00 · fbf1d2e28c
commit fbf1d2e28c
parent 783e043353
1 changed files with 91 additions and 0 deletions
--- a/homelab-ai-bot/telegram_bot.py
+++ b/homelab-ai-bot/telegram_bot.py
@ -424,6 +424,10 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
        handlers = context.get_tool_handlers(session_id=session_id)
        answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
        plausibility_warning = _check_flight_plausibility(answer)
        if plausibility_warning:
            answer += plausibility_warning
        if session_id:
            user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]"
            memory_client.log_message(session_id, "user", user_msg)
@ -435,6 +439,85 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
        await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
 def _check_flight_plausibility(text: str) -> str:
    """Prueft LLM-Antwort auf verdaechtige Layover-Zeiten zwischen Flugsegmenten.
    Parst Datum/Uhrzeit-Paare aus der strukturierten Antwort und flaggt
    Segmentueberg aenge mit >20h berechneter Umsteigezeit bei <3h Uhrzeitdifferenz.
    """
    import re
    from datetime import datetime, timedelta
    MONTHS = {"JAN": 1, "FEB": 2, "MAR": 3, "APR": 4, "MAY": 5, "JUN": 6,
              "JUL": 7, "AUG": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DEC": 12}
    segments = []
    current_seg = {}
    for line in text.split("\n"):
        line_clean = line.strip().replace("**", "").replace("*", "")
        date_match = re.search(r"Datum:\s*(\d{1,2})\s*(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)", line_clean, re.IGNORECASE)
        if date_match:
            day = int(date_match.group(1))
            month = MONTHS.get(date_match.group(2).upper(), 0)
            current_seg["date_day"] = day
            current_seg["date_month"] = month
        dep_match = re.search(r"Abflug:\s*(\d{1,2}):(\d{2})", line_clean)
        if dep_match:
            current_seg["dep_h"] = int(dep_match.group(1))
            current_seg["dep_m"] = int(dep_match.group(2))
        arr_match = re.search(r"Ankunft:\s*(\d{1,2}):(\d{2})", line_clean)
        if arr_match:
            current_seg["arr_h"] = int(arr_match.group(1))
            current_seg["arr_m"] = int(arr_match.group(2))
            next_day = "chster Tag" in line_clean or "+1" in line_clean
            current_seg["arr_next_day"] = next_day
        if all(k in current_seg for k in ("date_day", "date_month", "dep_h", "arr_h")):
            if current_seg not in segments:
                segments.append(dict(current_seg))
                current_seg = {}
    if len(segments) < 2:
        return ""
    warnings = []
    year = 2026
    for i in range(len(segments) - 1):
        s1 = segments[i]
        s2 = segments[i + 1]
        try:
            arr_day_offset = 1 if s1.get("arr_next_day") else 0
            arr_dt = datetime(year, s1["date_month"], s1["date_day"], s1.get("arr_h", 0), s1.get("arr_m", 0)) + timedelta(days=arr_day_offset)
            dep_dt = datetime(year, s2["date_month"], s2["date_day"], s2.get("dep_h", 0), s2.get("dep_m", 0))
            layover = dep_dt - arr_dt
            layover_h = layover.total_seconds() / 3600
            time_diff_minutes = abs(s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) - s2.get("dep_h", 0) * 60 - s2.get("dep_m", 0))
            if layover_h > 20 and time_diff_minutes < 180:
                same_day_layover_min = time_diff_minutes if s2.get("dep_h", 0) * 60 + s2.get("dep_m", 0) > s1.get("arr_h", 0) * 60 + s1.get("arr_m", 0) else (1440 - time_diff_minutes)
                warnings.append(
                    f"⚠️ Segment {i+1}→{i+2}: Berechnete Umsteigezeit = {layover_h:.0f}h. "
                    f"Die Uhrzeiten liegen nur {time_diff_minutes} Min auseinander. "
                    f"Moeglicherweise ist das Datum von Segment {i+2} falsch gelesen "
                    f"({s2['date_day']:02d}.{s2['date_month']:02d}. statt "
                    f"{s1['date_day']:02d}.{s1['date_month']:02d}.). Bitte auf dem Ticket pruefen."
                )
        except (ValueError, OverflowError):
            continue
    if warnings:
        return "\n\n🔍 Plausibilitaetspruefung:\n" + "\n".join(warnings)
    return ""
 def _extract_pdf_text(pdf_bytes: bytes) -> str:
    """Extrahiert Text aus PDF via PyPDF2. Gibt leeren String zurueck wenn kein Text."""
    try:
@ -478,6 +561,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
            handlers = context.get_tool_handlers(session_id=session_id)
            answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
            plausibility_warning = _check_flight_plausibility(answer)
            if plausibility_warning:
                answer += plausibility_warning
            if session_id:
                user_msg = f"[Bild-Datei] {caption}" if caption else "[Bild-Datei gesendet]"
                memory_client.log_message(session_id, "user", user_msg)
@ -510,6 +597,10 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
            handlers = context.get_tool_handlers(session_id=session_id)
            answer = llm.ask_with_tools(full_prompt, handlers, session_id=session_id)
            plausibility_warning = _check_flight_plausibility(answer)
            if plausibility_warning:
                answer += plausibility_warning
            if session_id:
                user_msg = f"[PDF: {doc.file_name or 'dokument.pdf'}] {caption}" if caption else f"[PDF: {doc.file_name or 'dokument.pdf'}]"
                memory_client.log_message(session_id, "user", user_msg)