fix: Ollama native API nutzen statt OpenAI-compat für Enricher
Qwen3 via /v1/chat/completions verbraucht max_tokens fürs Reasoning und liefert leeren Content. Umstellung auf /api/chat mit think=false löst das Problem.
This commit is contained in:
parent
d8e3207259
commit
caa2883a66
1 changed files with 9 additions and 6 deletions
|
|
@ -58,25 +58,28 @@ def _is_enriched(entry: dict) -> bool:
|
|||
|
||||
|
||||
def _call_ollama(prompt: str, model: str = MODEL) -> str:
|
||||
"""Ruft Ollama via native /api/chat auf (kein OpenAI-compat)."""
|
||||
payload = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": (
|
||||
"Du bist eine Filmdatenbank. Antworte NUR mit validem JSON, "
|
||||
"kein Markdown, keine Erklärungen. /no_think"
|
||||
"kein Markdown, keine Erklärungen."
|
||||
)},
|
||||
{"role": "user", "content": prompt + " /no_think"},
|
||||
{"role": "user", "content": prompt},
|
||||
],
|
||||
"max_tokens": 800,
|
||||
"stream": False,
|
||||
"think": False,
|
||||
"options": {"num_predict": 1024},
|
||||
}
|
||||
try:
|
||||
r = requests.post(
|
||||
f"{OLLAMA_BASE}/v1/chat/completions",
|
||||
json=payload, timeout=120,
|
||||
f"{OLLAMA_BASE}/api/chat",
|
||||
json=payload, timeout=180,
|
||||
)
|
||||
r.raise_for_status()
|
||||
text = r.json()["choices"][0]["message"]["content"].strip()
|
||||
data = r.json()
|
||||
text = data.get("message", {}).get("content", "").strip()
|
||||
if text.startswith("```"):
|
||||
text = re.sub(r"^```\w*\n?", "", text)
|
||||
text = re.sub(r"\n?```$", "", text)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue