savetv_enrich: File-Lock + Debug-Logging + robusterer Cache-Save

- fcntl.flock verhindert parallele Enricher-Instanzen - Atomarer Cache-Save über tmp-Datei - Debug-Logs bei leerer/gefilterter Beschreibung - Sleep auf 2s erhöht für stabilere Ollama-Antworten
2026-03-27 13:49:21 +01:00 · 2026-03-27 13:49:21 +01:00 · e204edf2ea
commit e204edf2ea
parent 908aadd13f
1 changed files with 30 additions and 14 deletions
--- a/homelab-ai-bot/savetv_enrich.py
+++ b/homelab-ai-bot/savetv_enrich.py
@ -9,6 +9,7 @@ die savetv_web.py und savetv_extra_routes.py verwenden.
 Ergebnis: 3-6 Sätze Beschreibung, Hauptdarsteller, Land, Jahr, Genre.
 """
 import fcntl
 import json
 import logging
 import os
@ -35,8 +36,9 @@ MODEL = "qwen2.5:14b"
 FALLBACK_MODEL = "qwen3:30b-a3b"
 FILMINFO_CACHE = Path("/mnt/savetv/.filminfo_cache.json")
 LOCKFILE = Path("/tmp/savetv_enrich.lock")
 BATCH_SIZE = 8
-SLEEP_BETWEEN = 1.5
+SLEEP_BETWEEN = 2.0
 def _load_cache() -> dict:
@ -49,16 +51,16 @@ def _load_cache() -> dict:
 def _save_cache(cache: dict):
-    FILMINFO_CACHE.write_text(json.dumps(cache, ensure_ascii=False, indent=1))
+    tmp = FILMINFO_CACHE.with_suffix(".tmp")
    tmp.write_text(json.dumps(cache, ensure_ascii=False, indent=1))
    tmp.rename(FILMINFO_CACHE)
 def _is_enriched(entry: dict) -> bool:
    """Prüft ob ein Cache-Eintrag bereits KI-angereichert ist."""
    return bool(entry.get("description"))
 def _call_ollama(prompt: str, model: str = MODEL) -> str:
    """Ruft Ollama via native /api/chat auf."""
    payload = {
        "model": model,
        "messages": [
@ -95,7 +97,6 @@ def _call_ollama(prompt: str, model: str = MODEL) -> str:
 def _normalize_actors(actors_raw) -> list:
    """Wandelt actors-Feld in eine einfache String-Liste um."""
    if not actors_raw or not isinstance(actors_raw, list):
        return []
    result = []
@ -110,7 +111,6 @@ def _normalize_actors(actors_raw) -> list:
 def _enrich_film(title: str) -> dict:
    """Fragt die KI nach Filmdaten zu einem Titel."""
    clean_title = re.sub(r"\s*[-\u2013\u2014]\s*.+$", "", title).strip()
    prompt = f"""Gib mir Informationen zum Film "{clean_title}".
@ -128,6 +128,7 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
    raw = _call_ollama(prompt)
    if not raw:
        log.warning("  Leere Antwort von Ollama für '%s'", title)
        return {"year": "", "countries": [], "genres": [], "actors": [],
                "director": "", "description": ""}
@ -139,17 +140,21 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
            try:
                data = json.loads(match.group())
            except json.JSONDecodeError:
-                log.warning("JSON-Parse fehlgeschlagen für '%s': %s", title, raw[:100])
+                log.warning("JSON-Parse fehlgeschlagen für '%s': %.200s", title, raw)
                return {"year": "", "countries": [], "genres": [], "actors": [],
                        "director": "", "description": ""}
        else:
-            log.warning("Kein JSON gefunden für '%s': %s", title, raw[:100])
+            log.warning("Kein JSON gefunden für '%s': %.200s", title, raw)
            return {"year": "", "countries": [], "genres": [], "actors": [],
                    "director": "", "description": ""}
    desc = str(data.get("description", ""))[:600]
-    if not _is_mostly_latin(desc):
+    if desc and not _is_mostly_latin(desc):
        log.info("  Nicht-lateinische Beschreibung gefiltert: %.80s", desc)
        desc = ""
    if not desc:
        log.info("  Beschreibung leer, raw year=%s actors=%s",
                 data.get("year"), str(data.get("actors", []))[:80])
    return {
        "year": str(data.get("year", ""))[:4],
@ -162,7 +167,6 @@ Falls du den Film nicht kennst, setze description auf leeren String."""
 def _is_mostly_latin(text: str) -> bool:
    """Prüft ob ein Text hauptsächlich lateinische Zeichen enthält."""
    if not text:
        return False
    latin = sum(1 for c in text if c.isascii() or '\u00C0' <= c <= '\u024F')
@ -170,7 +174,6 @@ def _is_mostly_latin(text: str) -> bool:
 def run():
    """Hauptfunktion: Archiv laden, fehlende Filme anreichern."""
    log.info("Starte Film-Enrichment...")
    entries = savetv._get_full_archive()
@ -194,7 +197,8 @@ def run():
        log.info("Alle %d Filme bereits angereichert", len(titles))
        return
-    log.info("%d Filme im Archiv, %d davon noch ohne KI-Beschreibung", len(titles), len(missing))
+    log.info("%d Filme im Archiv, %d davon noch ohne KI-Beschreibung",
             len(titles), len(missing))
    enriched = 0
    for i, title in enumerate(missing):
@ -204,7 +208,8 @@ def run():
            if info.get("description"):
                cache[title] = info
                enriched += 1
-                log.info("  OK: %s (%s)", info.get("year", "?"), ", ".join(info.get("actors", [])[:2]))
+                log.info("  OK: %s (%s)", info.get("year", "?"),
                         ", ".join(info.get("actors", [])[:2]))
                if enriched % BATCH_SIZE == 0:
                    _save_cache(cache)
                    log.info("  Cache gespeichert (%d angereichert)", enriched)
@ -229,4 +234,15 @@ def run():
 if __name__ == "__main__":
    lock_fd = open(LOCKFILE, "w")
    try:
        fcntl.flock(lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except BlockingIOError:
        print("Enricher läuft bereits — Abbruch.")
        sys.exit(0)
    try:
        run()
    finally:
        fcntl.flock(lock_fd, fcntl.LOCK_UN)
        lock_fd.close()