diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py index cc77ee67..9e38d51d 100644 --- a/homelab-ai-bot/llm.py +++ b/homelab-ai-bot/llm.py @@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(__file__)) from core import config MODEL = "openai/gpt-4o-mini" +VISION_MODEL = "openai/gpt-4o" MAX_TOOL_ROUNDS = 3 SYSTEM_PROMPT = """Du bist der Hausmeister-Bot fuer ein Homelab. Deutsch, kurz, direkt, operativ. @@ -375,11 +376,12 @@ def _get_api_key() -> str: return cfg.api_keys.get("openrouter_key", "") -def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> dict: +def _call_openrouter(messages: list, api_key: str, use_tools: bool = True, + model: str = None, max_tokens: int = 600) -> dict: payload = { - "model": MODEL, + "model": model or MODEL, "messages": messages, - "max_tokens": 600, + "max_tokens": max_tokens, } if use_tools: payload["tools"] = TOOLS @@ -389,7 +391,7 @@ def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> di "https://openrouter.ai/api/v1/chat/completions", headers={"Authorization": f"Bearer {api_key}"}, json=payload, - timeout=60, + timeout=90, ) r.raise_for_status() return r.json() @@ -519,10 +521,16 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session except Exception: memory_block = "" - prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst." + default_prompt = ( + "Lies dieses Bild/Dokument VOLLSTAENDIG und GENAU. " + "Extrahiere ALLE sichtbaren Texte, Zahlen, Daten, Namen. " + "Strukturiere die Informationen uebersichtlich. " + "Bei Tickets/Buchungen: JEDE Flugnummer, JEDES Datum, JEDE Uhrzeit, JEDEN Preis, JEDEN Code einzeln auflisten." + ) + prompt_text = caption if caption else default_prompt user_content = [ {"type": "text", "text": prompt_text}, - {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}", "detail": "high"}}, ] messages = [ @@ -545,7 +553,8 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session try: for _round in range(MAX_TOOL_ROUNDS): - data = _call_openrouter(messages, api_key, use_tools=True) + data = _call_openrouter(messages, api_key, use_tools=True, + model=VISION_MODEL, max_tokens=1200) choice = data["choices"][0] msg = choice["message"] @@ -577,8 +586,9 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session "content": str(result)[:3000], }) - data = _call_openrouter(messages, api_key, use_tools=False) + data = _call_openrouter(messages, api_key, use_tools=False, + model=VISION_MODEL, max_tokens=1200) return data["choices"][0]["message"]["content"] except Exception as e: - return f"LLM-Fehler: {e}" + return f"Vision-LLM-Fehler: {e}"