Vision: gpt-4o statt gpt-4o-mini fuer Bilderkennung, detail=high, 1200 Tokens

This commit is contained in:
root 2026-03-16 09:24:04 +07:00
parent c9f1985266
commit 345d3e45ab

View file

@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(__file__))
from core import config from core import config
MODEL = "openai/gpt-4o-mini" MODEL = "openai/gpt-4o-mini"
VISION_MODEL = "openai/gpt-4o"
MAX_TOOL_ROUNDS = 3 MAX_TOOL_ROUNDS = 3
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot fuer ein Homelab. Deutsch, kurz, direkt, operativ. SYSTEM_PROMPT = """Du bist der Hausmeister-Bot fuer ein Homelab. Deutsch, kurz, direkt, operativ.
@ -375,11 +376,12 @@ def _get_api_key() -> str:
return cfg.api_keys.get("openrouter_key", "") return cfg.api_keys.get("openrouter_key", "")
def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> dict: def _call_openrouter(messages: list, api_key: str, use_tools: bool = True,
model: str = None, max_tokens: int = 600) -> dict:
payload = { payload = {
"model": MODEL, "model": model or MODEL,
"messages": messages, "messages": messages,
"max_tokens": 600, "max_tokens": max_tokens,
} }
if use_tools: if use_tools:
payload["tools"] = TOOLS payload["tools"] = TOOLS
@ -389,7 +391,7 @@ def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> di
"https://openrouter.ai/api/v1/chat/completions", "https://openrouter.ai/api/v1/chat/completions",
headers={"Authorization": f"Bearer {api_key}"}, headers={"Authorization": f"Bearer {api_key}"},
json=payload, json=payload,
timeout=60, timeout=90,
) )
r.raise_for_status() r.raise_for_status()
return r.json() return r.json()
@ -519,10 +521,16 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
except Exception: except Exception:
memory_block = "" memory_block = ""
prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst." default_prompt = (
"Lies dieses Bild/Dokument VOLLSTAENDIG und GENAU. "
"Extrahiere ALLE sichtbaren Texte, Zahlen, Daten, Namen. "
"Strukturiere die Informationen uebersichtlich. "
"Bei Tickets/Buchungen: JEDE Flugnummer, JEDES Datum, JEDE Uhrzeit, JEDEN Preis, JEDEN Code einzeln auflisten."
)
prompt_text = caption if caption else default_prompt
user_content = [ user_content = [
{"type": "text", "text": prompt_text}, {"type": "text", "text": prompt_text},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}", "detail": "high"}},
] ]
messages = [ messages = [
@ -545,7 +553,8 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
try: try:
for _round in range(MAX_TOOL_ROUNDS): for _round in range(MAX_TOOL_ROUNDS):
data = _call_openrouter(messages, api_key, use_tools=True) data = _call_openrouter(messages, api_key, use_tools=True,
model=VISION_MODEL, max_tokens=1200)
choice = data["choices"][0] choice = data["choices"][0]
msg = choice["message"] msg = choice["message"]
@ -577,8 +586,9 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
"content": str(result)[:3000], "content": str(result)[:3000],
}) })
data = _call_openrouter(messages, api_key, use_tools=False) data = _call_openrouter(messages, api_key, use_tools=False,
model=VISION_MODEL, max_tokens=1200)
return data["choices"][0]["message"]["content"] return data["choices"][0]["message"]["content"]
except Exception as e: except Exception as e:
return f"LLM-Fehler: {e}" return f"Vision-LLM-Fehler: {e}"