diff --git a/homelab-ai-bot/tools/savetv.py b/homelab-ai-bot/tools/savetv.py index bb7f55a2..c2158adf 100644 --- a/homelab-ai-bot/tools/savetv.py +++ b/homelab-ai-bot/tools/savetv.py @@ -102,11 +102,10 @@ SYSTEM_PROMPT_EXTRA = """TV / Save.TV Tools: - savetv_record: Nimmt einen Film per TelecastId auf - get_savetv_status: Zeigt Archiv und geplante Aufnahmen Wenn der User nach Archiv-Filmen/Bewertung fragt, nutze get_savetv_archive_filme. -WICHTIG bei Archiv-Bewertung: Die Scores sind nur grobe Heuristiken (Sender, Highlight-Flag). -Nutze DEIN eigenes Filmwissen um die wirklich guten Filme zu identifizieren! Schau die -KOMPLETTE Liste durch — auch Filme mit Score 50-55 koennen Meisterwerke sein (z.B. bekannte -internationale Filme, Oscar-Gewinner, Klassiker). Sortiere nach DEINER Einschaetzung der -Filmqualitaet, nicht blind nach Score. Hebe besonders hervor: bald ablaufende gute Filme. +WICHTIG bei Archiv-Bewertung: Das Tool liefert KINO-HIGHLIGHTS (echte Kinofilme, Klassiker, +preisgekroente Filme) getrennt von deutschem Fernsehprogramm. Praesentiere dem User die +KINO-HIGHLIGHTS zuerst und erklaere kurz warum jeder Film sehenswert ist (Regisseur, Preise, +Stars). Hebe DRINGEND ablaufende Kino-Highlights besonders hervor — die muss er schnell sichern. """ @@ -406,61 +405,78 @@ DOKU_KEYWORDS = { "gehirn unter strom", } +KNOWN_CINEMA = { + "gravity", "blood diamond", "dunkirk", + "die fabelhafte welt der amelie", "children of men", + "die üblichen verdächtigen", "mord im orient express", + "aviator", "12 monkeys", "bullet train", "salt", + "anatomie eines falls", "i, tonya", "die wannseekonferenz", + "die vögel", "gosford park", + "albert nobbs", "der mit dem wolf tanzt", "zeugin der anklage", + "crazy heart", "talk to me", "das massaker von katyn", + "fallende blätter", "das lehrerzimmer", "die aussprache", + "old henry", "unternehmen petticoat", + "die reise zum mittelpunkt der erde", + "tagebuch einer kammerzofe", + "deutschstunde", "disco boy", + "die kinder der seidenstraße", + "führer und verführer", "nur fliegen ist schöner", + "der mit dem wolf tanzt", "nightwatch", + "der dritte", "blind willow", + "the revenant", "lion", "undisputed", + "no turning back", "parker", "stirb langsam", + "shooter", "valerian", "the suicide squad", + "mile 22", "open range", "hot summer nights", + "local hero", "z for zachariah", "the good neighbor", + "the informer", "la coda del diavolo", + "ladykillers", "hi-lo country", "yalda", + "bad director", "powder girl", +} -def _score_archive_film(title, station, highlight, subtitle="", thema=""): - """Bewertet einen Archiv-Film heuristisch (0-100).""" +ENGLISH_WORDS = { + "the", "of", "and", "in", "for", "from", "with", "on", "at", + "to", "is", "has", "men", "man", "last", "night", "day", "club", + "girl", "boy", "road", "way", "no", "dead", "kill", "out", + "black", "good", "old", "new", "one", "two", "fallen", + "international", "redemption", "revenge", "spirit", +} + + +def _is_known_cinema(title): + """Prueft ob ein Film als bekannter Kinofilm erkannt wird.""" + t = title.lower().strip() + for known in KNOWN_CINEMA: + if t.startswith(known) or t == known: + return True + + words = set(re.findall(r'[a-z]+', t)) + english_count = len(words & ENGLISH_WORDS) + if re.match(r'^[A-Za-z0-9:,\'\-\.\! ]+$', title) and english_count >= 2: + return True + + return False + + +def _is_excluded(title): + """Filtert Programmänderungen und Dokus.""" t = title.lower() - s = station.lower() - if "programmänderung" in t: - return -1 - + return True for kw in DOKU_KEYWORDS: if kw in t: - return -1 - - score = 50 - - premium_stations = {"arte", "zdf", "das erste", "mdr", "swr", "ndr", "wdr", "br"} - action_stations = {"prosieben", "sat.1", "kabel 1", "vox", "rtl", "tele 5", "zdf_neo"} - if s in premium_stations: - score += 5 - elif s in action_stations: - score += 3 - - if highlight: - score += 15 - - desc = (thema or subtitle or "").lower() - if len(desc) > 30: - score += 5 - - quality_hints = [ - "oscar", "golden globe", "cannes", "berlinale", "venedig", - "preisgekrönt", "meisterwerk", "bestseller", "basiert auf", - ] - for hint in quality_hints: - if hint in desc or hint in t: - score += 10 - break - - if any(c.isascii() and c.isalpha() for c in title) and not all(c.isascii() for c in title if c.isalpha()): - pass - elif re.search(r'[A-Z][a-z]+ [A-Z][a-z]+', title) and not re.search(r'[äöüÄÖÜß]', title): - score += 8 - - return score + return True + return False def handle_get_savetv_archive_filme(**kw): - """Alle fertigen Archiv-Filme holen, bewerten, deduplizieren, sortiert ausgeben.""" + """Alle fertigen Archiv-Filme holen, in Kino vs. TV trennen, sortiert ausgeben.""" entries = _get_full_archive() if not entries: return "Keine Archiv-Eintraege gefunden." - films = [] seen_titles = {} series_count = 0 + excluded_count = 0 for e in entries: tc = e.get("STRTELECASTENTRY", {}) @@ -470,63 +486,74 @@ def handle_get_savetv_archive_filme(**kw): continue title = tc.get("STITLE", "?") + if _is_excluded(title): + excluded_count += 1 + continue + station = tc.get("STVSTATIONNAME", "?") - highlight = tc.get("BISHIGHLIGHT", False) - subtitle = tc.get("SSUBTITLE", "") - thema = tc.get("STHEMA", "") - date = tc.get("DSTARTDATE", "?")[:10] days_left = int(tc.get("IDAYSLEFTBEFOREDELETE", 0)) tid = int(tc.get("ITELECASTID", 0)) - - score = _score_archive_film(title, station, highlight, subtitle, thema) - if score < 0: - continue + is_cinema = _is_known_cinema(title) key = title.lower().strip() if key in seen_titles: if days_left > seen_titles[key]["days_left"]: - seen_titles[key]["days_left"] = days_left - seen_titles[key]["date"] = date - seen_titles[key]["tid"] = tid + seen_titles[key].update(days_left=days_left, tid=tid) continue seen_titles[key] = { - "title": title, "station": station, "date": date, - "days_left": days_left, "score": score, "tid": tid, - "highlight": highlight, + "title": title, "station": station, + "days_left": days_left, "tid": tid, + "cinema": is_cinema, } - films = sorted(seen_titles.values(), key=lambda x: (-x["score"], x["days_left"])) - - total_archive = len(entries) - urgent = sorted( - [f for f in films if f["days_left"] <= 7], - key=lambda x: (x["days_left"], -x["score"]), + all_films = list(seen_titles.values()) + cinema = sorted( + [f for f in all_films if f["cinema"]], + key=lambda x: x["days_left"], + ) + tv_films = sorted( + [f for f in all_films if not f["cinema"]], + key=lambda x: x["days_left"], ) lines = [ - f"Save.TV Archiv-Bewertung: {len(films)} Filme " - f"(von {total_archive} Aufnahmen, {series_count} Serien-Episoden gefiltert)\n" + f"Save.TV Archiv: {len(all_films)} Filme " + f"({series_count} Serien, {excluded_count} Dokus/Spam gefiltert)\n" ] - if urgent: - lines.append(f"DRINGEND — {len(urgent)} Filme laufen in <=7 Tagen ab:") - for f in urgent: + cinema_urgent = [f for f in cinema if f["days_left"] <= 7] + cinema_safe = [f for f in cinema if f["days_left"] > 7] + + if cinema_urgent: + lines.append( + f"KINO-HIGHLIGHTS DRINGEND — laufen bald ab, JETZT sichern:" + ) + for f in cinema_urgent: lines.append( - f" [{f['days_left']}d] {f['title'][:50]} | {f['station']} | TID {f['tid']}" + f" [{f['days_left']}d] {f['title']} ({f['station']})" ) lines.append("") - safe = [f for f in films if f["days_left"] > 7] - if safe: - lines.append( - f"ALLE FILME IM ARCHIV ({len(safe)}) — nutze dein Filmwissen " - f"um die besten zu identifizieren:" - ) - for f in safe: + if cinema_safe: + lines.append(f"KINO-HIGHLIGHTS ({len(cinema_safe)} Filme):") + for f in cinema_safe: lines.append( - f" {f['title'][:50]} | {f['station']} | {f['days_left']}d" + f" {f['title']} ({f['station']}, {f['days_left']}d)" ) + lines.append("") + + if tv_films: + tv_urgent = [f for f in tv_films if f["days_left"] <= 7] + tv_safe = [f for f in tv_films if f["days_left"] > 7] + lines.append( + f"DEUTSCHE TV-FILME ({len(tv_films)}, " + f"davon {len(tv_urgent)} bald ablaufend):" + ) + for f in tv_urgent: + lines.append(f" [{f['days_left']}d] {f['title']} ({f['station']})") + if tv_safe: + lines.append(f" ... und {len(tv_safe)} weitere mit >7 Tagen") return "\n".join(lines)