feat(llm): Ollama warmup bei Start - Modelle permanent im VRAM
- warmup_ollama() laedt qwen3:30b-a3b + nomic-embed-text mit keep_alive=-1 - Wird beim Bot-Start in post_init() aufgerufen (via asyncio.to_thread) - keep_alive=-1 nur ueber native Ollama API (/api/generate) moeglich - GPU haelt 22.6/24 GB permanent: Text + Embeddings ohne Swap
This commit is contained in:
parent
a0724ba6f1
commit
d2a6391f52
2 changed files with 15 additions and 1 deletions
|
|
@ -25,6 +25,20 @@ FALLBACK_MODEL = "qwen2.5:14b"
|
||||||
MAX_TOOL_ROUNDS = 3
|
MAX_TOOL_ROUNDS = 3
|
||||||
OLLAMA_MODELS = {MODEL_LOCAL, FALLBACK_MODEL}
|
OLLAMA_MODELS = {MODEL_LOCAL, FALLBACK_MODEL}
|
||||||
|
|
||||||
|
|
||||||
|
def warmup_ollama():
|
||||||
|
"""Laedt Hauptmodell + Embedding permanent in VRAM (keep_alive=-1)."""
|
||||||
|
for model in [MODEL_LOCAL, "nomic-embed-text"]:
|
||||||
|
try:
|
||||||
|
requests.post(
|
||||||
|
f"{OLLAMA_BASE}/api/generate",
|
||||||
|
json={"model": model, "prompt": "", "keep_alive": -1},
|
||||||
|
timeout=120,
|
||||||
|
)
|
||||||
|
log.info("Ollama warmup: %s permanent geladen", model)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("Ollama warmup fehlgeschlagen fuer %s: %s", model, e)
|
||||||
|
|
||||||
PASSTHROUGH_TOOLS = {"get_temperaturen", "get_energie", "get_heizung"}
|
PASSTHROUGH_TOOLS = {"get_temperaturen", "get_energie", "get_heizung"}
|
||||||
|
|
||||||
_LOCAL_OVERRIDES = [
|
_LOCAL_OVERRIDES = [
|
||||||
|
|
@ -269,7 +283,6 @@ def _call_api(messages: list, api_key: str, use_tools: bool = True,
|
||||||
headers = {"Content-Type": "application/json"}
|
headers = {"Content-Type": "application/json"}
|
||||||
timeout = _ollama_timeout_for(chosen)
|
timeout = _ollama_timeout_for(chosen)
|
||||||
_add_no_think(payload.get("messages", []))
|
_add_no_think(payload.get("messages", []))
|
||||||
payload["keep_alive"] = -1
|
|
||||||
else:
|
else:
|
||||||
url = f"{OPENROUTER_BASE}/chat/completions"
|
url = f"{OPENROUTER_BASE}/chat/completions"
|
||||||
headers = {"Authorization": f"Bearer {api_key}"}
|
headers = {"Authorization": f"Bearer {api_key}"}
|
||||||
|
|
|
||||||
|
|
@ -1025,6 +1025,7 @@ def main():
|
||||||
await application.bot.set_my_commands(BOT_COMMANDS)
|
await application.bot.set_my_commands(BOT_COMMANDS)
|
||||||
log.info("Kommandomenü registriert")
|
log.info("Kommandomenü registriert")
|
||||||
asyncio.create_task(_watchdog_loop())
|
asyncio.create_task(_watchdog_loop())
|
||||||
|
asyncio.create_task(asyncio.to_thread(llm.warmup_ollama))
|
||||||
asyncio.create_task(_monitor_loop(application))
|
asyncio.create_task(_monitor_loop(application))
|
||||||
log.info("Monitor-Loop aktiv (alle 10 Min)")
|
log.info("Monitor-Loop aktiv (alle 10 Min)")
|
||||||
if application.job_queue is None:
|
if application.job_queue is None:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue