fix(llm): kein Fallback auf Textmodell bei Vision-Anfragen

Wenn qwen3-vl:32b timeout hat, wurde bisher auf qwen2.5:14b (Textmodell) zurueckgefallen. Das kann keine Bilder sehen und halluziniert stattdessen. Jetzt: allow_fallback=False fuer Vision und klare Fehlermeldung bei Timeout.
2026-03-25 19:46:35 +01:00 · 2026-03-25 19:46:35 +01:00 · 92898a33e3
commit 92898a33e3
parent ac0f6fda2e
1 changed files with 9 additions and 2 deletions
--- a/homelab-ai-bot/llm.py
+++ b/homelab-ai-bot/llm.py
@ -525,7 +525,8 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
    try:
        for _round in range(MAX_TOOL_ROUNDS):
            data = _call_api(messages, api_key, use_tools=True,
-                             model=MODEL_VISION, max_tokens=4000)
+                             model=MODEL_VISION, max_tokens=4000,
+                             allow_fallback=False)
            choice = data["choices"][0]
            msg = choice["message"]

@ -561,8 +562,14 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
                })

        data = _call_api(messages, api_key, use_tools=False,
-                         model=MODEL_VISION, max_tokens=4000)
+                         model=MODEL_VISION, max_tokens=4000,
+                         allow_fallback=False)
        return data["choices"][0]["message"]["content"]

+    except requests.exceptions.ReadTimeout:
+        return (
+            "Das Vision-Modell antwortet nicht (Timeout). "
+            "Bitte in 1-2 Min erneut versuchen — das Modell wird gerade geladen."
+        )
    except Exception as e:
        return f"Vision-LLM-Fehler: {e}"