From e204edf2eaf319e72a9c219d22d1d450b362fab4 Mon Sep 17 00:00:00 2001
From: Homelab Cursor <homelab@orbitalo.net>
Date: Fri, 27 Mar 2026 13:49:21 +0100
Subject: [PATCH] savetv_enrich: File-Lock + Debug-Logging + robusterer
 Cache-Save
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- fcntl.flock verhindert parallele Enricher-Instanzen
- Atomarer Cache-Save über tmp-Datei
- Debug-Logs bei leerer/gefilterter Beschreibung
- Sleep auf 2s erhöht für stabilere Ollama-Antworten
---
 homelab-ai-bot/savetv_enrich.py | 44 ++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/homelab-ai-bot/savetv_enrich.py b/homelab-ai-bot/savetv_enrich.py
index 2e590d59..81b712df 100644
--- a/homelab-ai-bot/savetv_enrich.py
+++ b/homelab-ai-bot/savetv_enrich.py
@@ -9,6 +9,7 @@ die savetv_web.py und savetv_extra_routes.py verwenden.
 Ergebnis: 3-6 Sätze Beschreibung, Hauptdarsteller, Land, Jahr, Genre.
 """
 
+import fcntl
 import json
 import logging
 import os
@@ -35,8 +36,9 @@ MODEL = "qwen2.5:14b"
 FALLBACK_MODEL = "qwen3:30b-a3b"
 
 FILMINFO_CACHE = Path("/mnt/savetv/.filminfo_cache.json")
+LOCKFILE = Path("/tmp/savetv_enrich.lock")
 BATCH_SIZE = 8
-SLEEP_BETWEEN = 1.5
+SLEEP_BETWEEN = 2.0
 
 
 def _load_cache() -> dict:
@@ -49,16 +51,16 @@ def _load_cache() -> dict:
 
 
 def _save_cache(cache: dict):
-    FILMINFO_CACHE.write_text(json.dumps(cache, ensure_ascii=False, indent=1))
+    tmp = FILMINFO_CACHE.with_suffix(".tmp")
+    tmp.write_text(json.dumps(cache, ensure_ascii=False, indent=1))
+    tmp.rename(FILMINFO_CACHE)
 
 
 def _is_enriched(entry: dict) -> bool:
-    """Prüft ob ein Cache-Eintrag bereits KI-angereichert ist."""
     return bool(entry.get("description"))
 
 
 def _call_ollama(prompt: str, model: str = MODEL) -> str:
-    """Ruft Ollama via native /api/chat auf."""
     payload = {
         "model": model,
         "messages": [
@@ -95,7 +97,6 @@ def _call_ollama(prompt: str, model: str = MODEL) -> str:
 
 
 def _normalize_actors(actors_raw) -> list:
-    """Wandelt actors-Feld in eine einfache String-Liste um."""
     if not actors_raw or not isinstance(actors_raw, list):
         return []
     result = []
@@ -110,7 +111,6 @@ def _normalize_actors(actors_raw) -> list:
 
 
 def _enrich_film(title: str) -> dict:
-    """Fragt die KI nach Filmdaten zu einem Titel."""
     clean_title = re.sub(r"\s*[-\u2013\u2014]\s*.+$", "", title).strip()
 
     prompt = f"""Gib mir Informationen zum Film "{clean_title}".
@@ -128,6 +128,7 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
 
     raw = _call_ollama(prompt)
     if not raw:
+        log.warning("  Leere Antwort von Ollama für '%s'", title)
         return {"year": "", "countries": [], "genres": [], "actors": [],
                 "director": "", "description": ""}
 
@@ -139,17 +140,21 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
             try:
                 data = json.loads(match.group())
             except json.JSONDecodeError:
-                log.warning("JSON-Parse fehlgeschlagen für '%s': %s", title, raw[:100])
+                log.warning("JSON-Parse fehlgeschlagen für '%s': %.200s", title, raw)
                 return {"year": "", "countries": [], "genres": [], "actors": [],
                         "director": "", "description": ""}
         else:
-            log.warning("Kein JSON gefunden für '%s': %s", title, raw[:100])
+            log.warning("Kein JSON gefunden für '%s': %.200s", title, raw)
             return {"year": "", "countries": [], "genres": [], "actors": [],
                     "director": "", "description": ""}
 
     desc = str(data.get("description", ""))[:600]
-    if not _is_mostly_latin(desc):
+    if desc and not _is_mostly_latin(desc):
+        log.info("  Nicht-lateinische Beschreibung gefiltert: %.80s", desc)
         desc = ""
+    if not desc:
+        log.info("  Beschreibung leer, raw year=%s actors=%s",
+                 data.get("year"), str(data.get("actors", []))[:80])
 
     return {
         "year": str(data.get("year", ""))[:4],
@@ -162,7 +167,6 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
 
 
 def _is_mostly_latin(text: str) -> bool:
-    """Prüft ob ein Text hauptsächlich lateinische Zeichen enthält."""
     if not text:
         return False
     latin = sum(1 for c in text if c.isascii() or '\u00C0' <= c <= '\u024F')
@@ -170,7 +174,6 @@ def _is_mostly_latin(text: str) -> bool:
 
 
 def run():
-    """Hauptfunktion: Archiv laden, fehlende Filme anreichern."""
     log.info("Starte Film-Enrichment...")
 
     entries = savetv._get_full_archive()
@@ -194,7 +197,8 @@ def run():
         log.info("Alle %d Filme bereits angereichert", len(titles))
         return
 
-    log.info("%d Filme im Archiv, %d davon noch ohne KI-Beschreibung", len(titles), len(missing))
+    log.info("%d Filme im Archiv, %d davon noch ohne KI-Beschreibung",
+             len(titles), len(missing))
 
     enriched = 0
     for i, title in enumerate(missing):
@@ -204,7 +208,8 @@ def run():
             if info.get("description"):
                 cache[title] = info
                 enriched += 1
-                log.info("  OK: %s (%s)", info.get("year", "?"), ", ".join(info.get("actors", [])[:2]))
+                log.info("  OK: %s (%s)", info.get("year", "?"),
+                         ", ".join(info.get("actors", [])[:2]))
                 if enriched % BATCH_SIZE == 0:
                     _save_cache(cache)
                     log.info("  Cache gespeichert (%d angereichert)", enriched)
@@ -229,4 +234,15 @@ def run():
 
 
 if __name__ == "__main__":
-    run()
+    lock_fd = open(LOCKFILE, "w")
+    try:
+        fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+    except BlockingIOError:
+        print("Enricher läuft bereits — Abbruch.")
+        sys.exit(0)
+
+    try:
+        run()
+    finally:
+        fcntl.flock(lock_fd, fcntl.LOCK_UN)
+        lock_fd.close()