Save.TV: Kino-Highlights Erkennung statt Score-basiert, 62 echte Kinofilme identifiziert

This commit is contained in:
root 2026-03-17 15:14:28 +07:00
parent 2fae7af346
commit 63d2a64533

View file

@ -102,11 +102,10 @@ SYSTEM_PROMPT_EXTRA = """TV / Save.TV Tools:
- savetv_record: Nimmt einen Film per TelecastId auf
- get_savetv_status: Zeigt Archiv und geplante Aufnahmen
Wenn der User nach Archiv-Filmen/Bewertung fragt, nutze get_savetv_archive_filme.
WICHTIG bei Archiv-Bewertung: Die Scores sind nur grobe Heuristiken (Sender, Highlight-Flag).
Nutze DEIN eigenes Filmwissen um die wirklich guten Filme zu identifizieren! Schau die
KOMPLETTE Liste durch auch Filme mit Score 50-55 koennen Meisterwerke sein (z.B. bekannte
internationale Filme, Oscar-Gewinner, Klassiker). Sortiere nach DEINER Einschaetzung der
Filmqualitaet, nicht blind nach Score. Hebe besonders hervor: bald ablaufende gute Filme.
WICHTIG bei Archiv-Bewertung: Das Tool liefert KINO-HIGHLIGHTS (echte Kinofilme, Klassiker,
preisgekroente Filme) getrennt von deutschem Fernsehprogramm. Praesentiere dem User die
KINO-HIGHLIGHTS zuerst und erklaere kurz warum jeder Film sehenswert ist (Regisseur, Preise,
Stars). Hebe DRINGEND ablaufende Kino-Highlights besonders hervor die muss er schnell sichern.
"""
@ -406,61 +405,78 @@ DOKU_KEYWORDS = {
"gehirn unter strom",
}
KNOWN_CINEMA = {
"gravity", "blood diamond", "dunkirk",
"die fabelhafte welt der amelie", "children of men",
"die üblichen verdächtigen", "mord im orient express",
"aviator", "12 monkeys", "bullet train", "salt",
"anatomie eines falls", "i, tonya", "die wannseekonferenz",
"die vögel", "gosford park",
"albert nobbs", "der mit dem wolf tanzt", "zeugin der anklage",
"crazy heart", "talk to me", "das massaker von katyn",
"fallende blätter", "das lehrerzimmer", "die aussprache",
"old henry", "unternehmen petticoat",
"die reise zum mittelpunkt der erde",
"tagebuch einer kammerzofe",
"deutschstunde", "disco boy",
"die kinder der seidenstraße",
"führer und verführer", "nur fliegen ist schöner",
"der mit dem wolf tanzt", "nightwatch",
"der dritte", "blind willow",
"the revenant", "lion", "undisputed",
"no turning back", "parker", "stirb langsam",
"shooter", "valerian", "the suicide squad",
"mile 22", "open range", "hot summer nights",
"local hero", "z for zachariah", "the good neighbor",
"the informer", "la coda del diavolo",
"ladykillers", "hi-lo country", "yalda",
"bad director", "powder girl",
}
def _score_archive_film(title, station, highlight, subtitle="", thema=""):
"""Bewertet einen Archiv-Film heuristisch (0-100)."""
ENGLISH_WORDS = {
"the", "of", "and", "in", "for", "from", "with", "on", "at",
"to", "is", "has", "men", "man", "last", "night", "day", "club",
"girl", "boy", "road", "way", "no", "dead", "kill", "out",
"black", "good", "old", "new", "one", "two", "fallen",
"international", "redemption", "revenge", "spirit",
}
def _is_known_cinema(title):
"""Prueft ob ein Film als bekannter Kinofilm erkannt wird."""
t = title.lower().strip()
for known in KNOWN_CINEMA:
if t.startswith(known) or t == known:
return True
words = set(re.findall(r'[a-z]+', t))
english_count = len(words & ENGLISH_WORDS)
if re.match(r'^[A-Za-z0-9:,\'\-\.\! ]+$', title) and english_count >= 2:
return True
return False
def _is_excluded(title):
"""Filtert Programmänderungen und Dokus."""
t = title.lower()
s = station.lower()
if "programmänderung" in t:
return -1
return True
for kw in DOKU_KEYWORDS:
if kw in t:
return -1
score = 50
premium_stations = {"arte", "zdf", "das erste", "mdr", "swr", "ndr", "wdr", "br"}
action_stations = {"prosieben", "sat.1", "kabel 1", "vox", "rtl", "tele 5", "zdf_neo"}
if s in premium_stations:
score += 5
elif s in action_stations:
score += 3
if highlight:
score += 15
desc = (thema or subtitle or "").lower()
if len(desc) > 30:
score += 5
quality_hints = [
"oscar", "golden globe", "cannes", "berlinale", "venedig",
"preisgekrönt", "meisterwerk", "bestseller", "basiert auf",
]
for hint in quality_hints:
if hint in desc or hint in t:
score += 10
break
if any(c.isascii() and c.isalpha() for c in title) and not all(c.isascii() for c in title if c.isalpha()):
pass
elif re.search(r'[A-Z][a-z]+ [A-Z][a-z]+', title) and not re.search(r'[äöüÄÖÜß]', title):
score += 8
return score
return True
return False
def handle_get_savetv_archive_filme(**kw):
"""Alle fertigen Archiv-Filme holen, bewerten, deduplizieren, sortiert ausgeben."""
"""Alle fertigen Archiv-Filme holen, in Kino vs. TV trennen, sortiert ausgeben."""
entries = _get_full_archive()
if not entries:
return "Keine Archiv-Eintraege gefunden."
films = []
seen_titles = {}
series_count = 0
excluded_count = 0
for e in entries:
tc = e.get("STRTELECASTENTRY", {})
@ -470,63 +486,74 @@ def handle_get_savetv_archive_filme(**kw):
continue
title = tc.get("STITLE", "?")
if _is_excluded(title):
excluded_count += 1
continue
station = tc.get("STVSTATIONNAME", "?")
highlight = tc.get("BISHIGHLIGHT", False)
subtitle = tc.get("SSUBTITLE", "")
thema = tc.get("STHEMA", "")
date = tc.get("DSTARTDATE", "?")[:10]
days_left = int(tc.get("IDAYSLEFTBEFOREDELETE", 0))
tid = int(tc.get("ITELECASTID", 0))
score = _score_archive_film(title, station, highlight, subtitle, thema)
if score < 0:
continue
is_cinema = _is_known_cinema(title)
key = title.lower().strip()
if key in seen_titles:
if days_left > seen_titles[key]["days_left"]:
seen_titles[key]["days_left"] = days_left
seen_titles[key]["date"] = date
seen_titles[key]["tid"] = tid
seen_titles[key].update(days_left=days_left, tid=tid)
continue
seen_titles[key] = {
"title": title, "station": station, "date": date,
"days_left": days_left, "score": score, "tid": tid,
"highlight": highlight,
"title": title, "station": station,
"days_left": days_left, "tid": tid,
"cinema": is_cinema,
}
films = sorted(seen_titles.values(), key=lambda x: (-x["score"], x["days_left"]))
total_archive = len(entries)
urgent = sorted(
[f for f in films if f["days_left"] <= 7],
key=lambda x: (x["days_left"], -x["score"]),
all_films = list(seen_titles.values())
cinema = sorted(
[f for f in all_films if f["cinema"]],
key=lambda x: x["days_left"],
)
tv_films = sorted(
[f for f in all_films if not f["cinema"]],
key=lambda x: x["days_left"],
)
lines = [
f"Save.TV Archiv-Bewertung: {len(films)} Filme "
f"(von {total_archive} Aufnahmen, {series_count} Serien-Episoden gefiltert)\n"
f"Save.TV Archiv: {len(all_films)} Filme "
f"({series_count} Serien, {excluded_count} Dokus/Spam gefiltert)\n"
]
if urgent:
lines.append(f"DRINGEND — {len(urgent)} Filme laufen in <=7 Tagen ab:")
for f in urgent:
cinema_urgent = [f for f in cinema if f["days_left"] <= 7]
cinema_safe = [f for f in cinema if f["days_left"] > 7]
if cinema_urgent:
lines.append(
f" [{f['days_left']}d] {f['title'][:50]} | {f['station']} | TID {f['tid']}"
f"KINO-HIGHLIGHTS DRINGEND — laufen bald ab, JETZT sichern:"
)
for f in cinema_urgent:
lines.append(
f" [{f['days_left']}d] {f['title']} ({f['station']})"
)
lines.append("")
safe = [f for f in films if f["days_left"] > 7]
if safe:
if cinema_safe:
lines.append(f"KINO-HIGHLIGHTS ({len(cinema_safe)} Filme):")
for f in cinema_safe:
lines.append(
f"ALLE FILME IM ARCHIV ({len(safe)}) — nutze dein Filmwissen "
f"um die besten zu identifizieren:"
f" {f['title']} ({f['station']}, {f['days_left']}d)"
)
for f in safe:
lines.append("")
if tv_films:
tv_urgent = [f for f in tv_films if f["days_left"] <= 7]
tv_safe = [f for f in tv_films if f["days_left"] > 7]
lines.append(
f" {f['title'][:50]} | {f['station']} | {f['days_left']}d"
f"DEUTSCHE TV-FILME ({len(tv_films)}, "
f"davon {len(tv_urgent)} bald ablaufend):"
)
for f in tv_urgent:
lines.append(f" [{f['days_left']}d] {f['title']} ({f['station']})")
if tv_safe:
lines.append(f" ... und {len(tv_safe)} weitere mit >7 Tagen")
return "\n".join(lines)