From 830682945b94b6c03fa63adf9d73e4877094f05f Mon Sep 17 00:00:00 2001 From: orbitalo Date: Tue, 31 Mar 2026 11:24:04 +0000 Subject: [PATCH] Add savetv_country_filter: exclude DE/FR production countries from film selection --- homelab-ai-bot/tools/savetv_country_filter.py | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 homelab-ai-bot/tools/savetv_country_filter.py diff --git a/homelab-ai-bot/tools/savetv_country_filter.py b/homelab-ai-bot/tools/savetv_country_filter.py new file mode 100644 index 00000000..066e387c --- /dev/null +++ b/homelab-ai-bot/tools/savetv_country_filter.py @@ -0,0 +1,79 @@ +"""Save.TV — Filter: Deutschland/Frankreich (Produktionsland via Wikidata-Cache). + +Nutzt dieselbe .filminfo_cache.json wie savetv_extra_routes (Wikidata countries). +Ohne Cache-Eintrag fuer einen Titel wird nicht gefiltert (Film bleibt sichtbar). + +Deaktivieren: Umgebungsvariable SAVETV_FILTER_DE_FR=0 +""" +from __future__ import annotations + +import json +import os +import re +from pathlib import Path + +FILMINFO_CACHE = Path("/mnt/savetv/.filminfo_cache.json") + +# Deutschland / Frankreich (haeufige Wikidata-Sprachvarianten) +_COUNTRY_EXCL = re.compile( + r"\b(" + r"deutschland|germany|allemagne|bundesrepublik(\s+deutschland)?|" + r"west\s+germany|east\s+germany|german\s+democratic\s+republic|" + r"rfa|\bbrd\b|\bddr\b|\bgdr\b|" + r"frankreich|france|republique\s+francaise" + r")\b", + re.IGNORECASE, +) + + +def _enabled() -> bool: + v = (os.environ.get("SAVETV_FILTER_DE_FR") or "1").strip().lower() + return v not in ("0", "false", "no", "off") + + +def _norm_title_key(s: str) -> str: + return re.sub(r"\s+", " ", re.sub(r"[^\w\s]", " ", s or "")).strip().lower() + + +def load_filminfo_cache() -> dict: + if not FILMINFO_CACHE.exists(): + return {} + try: + data = json.loads(FILMINFO_CACHE.read_text(encoding="utf-8")) + return data if isinstance(data, dict) else {} + except Exception: + return {} + + +def _countries_for_title(title: str, cache: dict) -> list[str] | None: + if not title or not cache: + return None + entry = cache.get(title) + if entry is None: + nk = _norm_title_key(title) + for k, v in cache.items(): + if isinstance(k, str) and _norm_title_key(k) == nk: + entry = v + break + if not entry or not isinstance(entry, dict): + return None + c = entry.get("countries") + if not c: + return None + if isinstance(c, list): + return [str(x) for x in c if x] + return [str(c)] + + +def should_exclude_production_country(title: str, cache: dict | None = None) -> bool: + """True, wenn ein Produktionsland DE oder FR ist (laut Wikidata-Cache).""" + if not _enabled(): + return False + cache = load_filminfo_filter() if cache is None else cache + countries = _countries_for_title(title, cache) + if not countries: + return False + for name in countries: + if name and _COUNTRY_EXCL.search(name): + return True + return False