From 801114590497e47ad9fca830778b0f8416423812 Mon Sep 17 00:00:00 2001 From: Homelab Cursor Date: Sat, 11 Apr 2026 07:33:57 +0200 Subject: [PATCH] feat: Hausmeister-Bot Text-Modell auf Grok 4.1 Fast (OpenRouter) umgestellt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - MODEL_LOCAL: qwen3:30b-a3b (Ollama lokal) -> x-ai/grok-4.1-fast (OpenRouter) - OLLAMA_MODELS geleert — kein lokales Ollama mehr fuer Text - warmup_ollama() als No-Op (kein VRAM-Warmup mehr noetig) - Vision bleibt auf gpt-4o-mini (OpenRouter, kein GPU) - Ref: Issue #75 Phase 1 --- homelab-ai-bot/llm.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py index f3bb6afc..5caad8b8 100644 --- a/homelab-ai-bot/llm.py +++ b/homelab-ai-bot/llm.py @@ -18,27 +18,17 @@ log = logging.getLogger('llm') OLLAMA_BASE = "http://100.84.255.83:11434" OPENROUTER_BASE = "https://openrouter.ai/api/v1" -MODEL_LOCAL = "qwen3:30b-a3b" +MODEL_LOCAL = "x-ai/grok-4.1-fast" MODEL_VISION = "openai/gpt-4o-mini" MODEL_ONLINE = "perplexity/sonar" -FALLBACK_MODEL = "qwen2.5:14b" +FALLBACK_MODEL = None MAX_TOOL_ROUNDS = 3 -OLLAMA_MODELS = {MODEL_LOCAL, FALLBACK_MODEL} +OLLAMA_MODELS = set() def warmup_ollama(): - """Laedt Hauptmodell + Embedding permanent in VRAM (keep_alive=-1).""" - for model in [MODEL_LOCAL, "nomic-embed-text"]: - try: - requests.post( - f"{OLLAMA_BASE}/api/generate", - json={"model": model, "prompt": "", "keep_alive": -1}, - timeout=120, - ) - log.info("Ollama warmup: %s permanent geladen", model) - except Exception as e: - log.warning("Ollama warmup fehlgeschlagen fuer %s: %s", model, e) - + """No-Op: Text-Modell laeuft jetzt ueber OpenRouter (Grok 4.1 Fast), kein Ollama-Warmup noetig.""" + log.info('Ollama warmup uebersprungen — Text laeuft ueber OpenRouter (Grok 4.1 Fast)') PASSTHROUGH_TOOLS = {"get_temperaturen", "get_energie", "get_heizung"} _LOCAL_OVERRIDES = [