Vision: gpt-4o statt gpt-4o-mini fuer Bilderkennung, detail=high, 1200 Tokens
This commit is contained in:
parent
c9f1985266
commit
345d3e45ab
1 changed files with 19 additions and 9 deletions
|
|
@ -13,6 +13,7 @@ sys.path.insert(0, os.path.dirname(__file__))
|
||||||
from core import config
|
from core import config
|
||||||
|
|
||||||
MODEL = "openai/gpt-4o-mini"
|
MODEL = "openai/gpt-4o-mini"
|
||||||
|
VISION_MODEL = "openai/gpt-4o"
|
||||||
MAX_TOOL_ROUNDS = 3
|
MAX_TOOL_ROUNDS = 3
|
||||||
|
|
||||||
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot fuer ein Homelab. Deutsch, kurz, direkt, operativ.
|
SYSTEM_PROMPT = """Du bist der Hausmeister-Bot fuer ein Homelab. Deutsch, kurz, direkt, operativ.
|
||||||
|
|
@ -375,11 +376,12 @@ def _get_api_key() -> str:
|
||||||
return cfg.api_keys.get("openrouter_key", "")
|
return cfg.api_keys.get("openrouter_key", "")
|
||||||
|
|
||||||
|
|
||||||
def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> dict:
|
def _call_openrouter(messages: list, api_key: str, use_tools: bool = True,
|
||||||
|
model: str = None, max_tokens: int = 600) -> dict:
|
||||||
payload = {
|
payload = {
|
||||||
"model": MODEL,
|
"model": model or MODEL,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"max_tokens": 600,
|
"max_tokens": max_tokens,
|
||||||
}
|
}
|
||||||
if use_tools:
|
if use_tools:
|
||||||
payload["tools"] = TOOLS
|
payload["tools"] = TOOLS
|
||||||
|
|
@ -389,7 +391,7 @@ def _call_openrouter(messages: list, api_key: str, use_tools: bool = True) -> di
|
||||||
"https://openrouter.ai/api/v1/chat/completions",
|
"https://openrouter.ai/api/v1/chat/completions",
|
||||||
headers={"Authorization": f"Bearer {api_key}"},
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
json=payload,
|
json=payload,
|
||||||
timeout=60,
|
timeout=90,
|
||||||
)
|
)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
return r.json()
|
return r.json()
|
||||||
|
|
@ -519,10 +521,16 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
|
||||||
except Exception:
|
except Exception:
|
||||||
memory_block = ""
|
memory_block = ""
|
||||||
|
|
||||||
prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst."
|
default_prompt = (
|
||||||
|
"Lies dieses Bild/Dokument VOLLSTAENDIG und GENAU. "
|
||||||
|
"Extrahiere ALLE sichtbaren Texte, Zahlen, Daten, Namen. "
|
||||||
|
"Strukturiere die Informationen uebersichtlich. "
|
||||||
|
"Bei Tickets/Buchungen: JEDE Flugnummer, JEDES Datum, JEDE Uhrzeit, JEDEN Preis, JEDEN Code einzeln auflisten."
|
||||||
|
)
|
||||||
|
prompt_text = caption if caption else default_prompt
|
||||||
user_content = [
|
user_content = [
|
||||||
{"type": "text", "text": prompt_text},
|
{"type": "text", "text": prompt_text},
|
||||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}},
|
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}", "detail": "high"}},
|
||||||
]
|
]
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
|
|
@ -545,7 +553,8 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for _round in range(MAX_TOOL_ROUNDS):
|
for _round in range(MAX_TOOL_ROUNDS):
|
||||||
data = _call_openrouter(messages, api_key, use_tools=True)
|
data = _call_openrouter(messages, api_key, use_tools=True,
|
||||||
|
model=VISION_MODEL, max_tokens=1200)
|
||||||
choice = data["choices"][0]
|
choice = data["choices"][0]
|
||||||
msg = choice["message"]
|
msg = choice["message"]
|
||||||
|
|
||||||
|
|
@ -577,8 +586,9 @@ def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session
|
||||||
"content": str(result)[:3000],
|
"content": str(result)[:3000],
|
||||||
})
|
})
|
||||||
|
|
||||||
data = _call_openrouter(messages, api_key, use_tools=False)
|
data = _call_openrouter(messages, api_key, use_tools=False,
|
||||||
|
model=VISION_MODEL, max_tokens=1200)
|
||||||
return data["choices"][0]["message"]["content"]
|
return data["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"LLM-Fehler: {e}"
|
return f"Vision-LLM-Fehler: {e}"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue