feat: max_flugzeit_h (22h), max_stops (2), echtes PE, Kayak Flugzeit/Stops-Filter, db_migrate.py
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
36d671c301
commit
f85c049aca
5 changed files with 168 additions and 27 deletions
|
|
@ -40,12 +40,13 @@ def init_db():
|
|||
airline_filter TEXT DEFAULT '',
|
||||
layover_min INTEGER DEFAULT 120,
|
||||
layover_max INTEGER DEFAULT 300,
|
||||
max_flugzeit_h INTEGER DEFAULT 22,
|
||||
max_stops INTEGER DEFAULT 2,
|
||||
intervall TEXT DEFAULT 'daily',
|
||||
aktiv INTEGER DEFAULT 1,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
# Spalten nachrüsten falls DB schon existiert
|
||||
for col_sql in [
|
||||
"ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'",
|
||||
"ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60",
|
||||
|
|
@ -54,6 +55,8 @@ def init_db():
|
|||
"ALTER TABLE jobs ADD COLUMN airline_filter TEXT DEFAULT ''",
|
||||
"ALTER TABLE jobs ADD COLUMN layover_min INTEGER DEFAULT 120",
|
||||
"ALTER TABLE jobs ADD COLUMN layover_max INTEGER DEFAULT 300",
|
||||
"ALTER TABLE jobs ADD COLUMN max_flugzeit_h INTEGER DEFAULT 22",
|
||||
"ALTER TABLE jobs ADD COLUMN max_stops INTEGER DEFAULT 2",
|
||||
]:
|
||||
try:
|
||||
c.execute(col_sql)
|
||||
|
|
@ -166,16 +169,14 @@ PLAUSI_CHECK: [Preise unter 1000 EUR einzeln einordnen - was stimmt da nicht]"""
|
|||
c.execute("""
|
||||
INSERT INTO jobs
|
||||
(scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||
airline_filter, layover_min, layover_max, intervall)
|
||||
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops, intervall)
|
||||
VALUES
|
||||
-- Allgemeine Suche (alle Airlines, Umstieg 2-5h)
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||
('trip', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||
-- Airline-spezifisch via Kayak (Umstieg 2-5h erzwingen)
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CZ', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CX', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','SQ', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','TG', 120,300,'daily')
|
||||
('kayak','FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,22,2,'daily'),
|
||||
('trip', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,22,2,'daily'),
|
||||
('kayak','FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CZ',120,300,22,2,'daily'),
|
||||
('kayak','FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CX',120,300,22,2,'daily'),
|
||||
('kayak','FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','SQ',120,300,22,2,'daily'),
|
||||
('kayak','FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','TG',120,300,22,2,'daily')
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
|
|
|||
120
hub/src/db_migrate.py
Normal file
120
hub/src/db_migrate.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
"""
|
||||
Einmalig ausführen um laufende DB zu migrieren.
|
||||
docker exec flugscanner-web python3 /app/src/db_migrate.py
|
||||
"""
|
||||
import sqlite3, os
|
||||
|
||||
DB_PATH = os.environ.get("DB_PATH", "/data/flugscanner.db")
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
|
||||
# 1. Neue Spalten nachrüsten
|
||||
for sql, desc in [
|
||||
("ALTER TABLE jobs ADD COLUMN airline_filter TEXT DEFAULT ''", "airline_filter"),
|
||||
("ALTER TABLE jobs ADD COLUMN layover_min INTEGER DEFAULT 120", "layover_min"),
|
||||
("ALTER TABLE jobs ADD COLUMN layover_max INTEGER DEFAULT 300", "layover_max"),
|
||||
("ALTER TABLE jobs ADD COLUMN max_flugzeit_h INTEGER DEFAULT 22","max_flugzeit_h"),
|
||||
("ALTER TABLE jobs ADD COLUMN max_stops INTEGER DEFAULT 2", "max_stops"),
|
||||
]:
|
||||
try:
|
||||
conn.execute(sql)
|
||||
print(f" ✓ Spalte hinzugefügt: {desc}")
|
||||
except Exception:
|
||||
print(f" — Spalte existiert: {desc}")
|
||||
|
||||
# 2. Bestehende Jobs mit vernünftigen Standardwerten befüllen
|
||||
conn.execute("""
|
||||
UPDATE jobs SET
|
||||
layover_min = 120,
|
||||
layover_max = 300,
|
||||
max_flugzeit_h = 22,
|
||||
max_stops = 2
|
||||
WHERE layover_min IS NULL OR layover_min = 0
|
||||
""")
|
||||
conn.execute("UPDATE jobs SET airline_filter = '' WHERE airline_filter IS NULL")
|
||||
print(" ✓ Bestehende Jobs aktualisiert")
|
||||
|
||||
# 3. Airline-spezifische Jobs anlegen (nur wenn noch nicht vorhanden)
|
||||
airlines = [
|
||||
("CZ", "China Southern"),
|
||||
("CX", "Cathay Pacific"),
|
||||
("SQ", "Singapore Airlines"),
|
||||
("TG", "Thai Airways"),
|
||||
]
|
||||
for code, name in airlines:
|
||||
exists = conn.execute(
|
||||
"SELECT id FROM jobs WHERE scanner='kayak' AND airline_filter=?", (code,)
|
||||
).fetchone()
|
||||
if not exists:
|
||||
conn.execute("""
|
||||
INSERT INTO jobs
|
||||
(scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops, intervall)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""", ("kayak","FRA","KTI",30,60,"roundtrip","premium_economy",
|
||||
"1koffer+handgepaeck", code, 120, 300, 22, 2, "daily"))
|
||||
print(f" ✓ Job angelegt: Kayak [{code}] {name}")
|
||||
else:
|
||||
print(f" — Job existiert: [{code}] {name}")
|
||||
|
||||
# 4. Prompt aktualisieren
|
||||
PROMPT = """Du bist ein Flugpreis-Analyst. Analysiere Preisdaten für diesen Flug:
|
||||
|
||||
STRECKE: ROUNDTRIP Frankfurt (FRA) → Phnom Penh Techo Airport (KTI)
|
||||
KABINE: Premium Economy (echte PE-Sitze mit extra Beinfreiheit, NICHT Economy mit anderem Namen!)
|
||||
GEPÄCK: 1 großer Aufgabekoffer + Handgepäck (zwingend inklusive)
|
||||
AUFENTHALT: ca. 2 Monate
|
||||
|
||||
BEVORZUGTE AIRLINES (nach Preis-Leistung):
|
||||
- China Southern (CZ): Umstieg Guangzhou (CAN) — meist günstigste Option
|
||||
- Cathay Pacific (CX): Umstieg Hongkong (HKG) — sehr gutes PE-Produkt
|
||||
- Singapore Airlines (SQ): Umstieg Singapur (SIN) — bestes PE-Produkt
|
||||
- Thai Airways (TG): Umstieg Bangkok (BKK) — gutes Netz nach KTI
|
||||
- Vietnam Airlines (VN): Umstieg Hanoi (HAN) — direktester Weg nach KTI
|
||||
|
||||
HARTE FILTER (Flüge außerhalb dieser Grenzen ablehnen):
|
||||
- Umstiegszeit an asiatischen Hubs: MUSS 2–5 Stunden sein (120–300 Min)
|
||||
→ Unter 2h = Gepäck-Transfer-Risiko / Über 5h = Hotelübernachtung nötig
|
||||
- Gesamtreisezeit: MAX 22 Stunden (FRA→KTI oder KTI→FRA)
|
||||
→ Flüge mit 30+ Stunden (z.B. mehrere Stopps mit langen Wartezeiten) ausschließen
|
||||
- Maximale Stopps: 2 (idealerweise 1)
|
||||
|
||||
WICHTIG: Preise unter 1000 EUR für diesen Roundtrip sind fast immer unplausibel.
|
||||
Mögliche Gründe: Economy statt PE, nur Hinflug, kein Freigepäck, falsche Route.
|
||||
|
||||
Aktuelle Preise (Anbieter | Node | Airline | Preis):
|
||||
{preise_heute}
|
||||
|
||||
Preisverlauf letzte 30 Tage:
|
||||
{preisverlauf}
|
||||
|
||||
Statistik: Ø {avg} EUR | Min {min} EUR | Max {max} EUR
|
||||
|
||||
Antworte auf Deutsch:
|
||||
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
||||
BEGRUENDUNG: [1-2 Sätze]
|
||||
BESTER_PREIS: [Anbieter + Airline + Preis + Node]
|
||||
BESTE_AIRLINE: [welche der Airlines aktuell am günstigsten und warum]
|
||||
TREND: [STEIGEND / FALLEND / STABIL]
|
||||
GEO_UNTERSCHIED: [DE-Scanner vs. KH-Scanner Preisdifferenz und Erklärung]
|
||||
FILTER_WARNUNG: [Welche gefundenen Preise gegen Flugzeit/Stopps/Umstieg-Regeln verstoßen]
|
||||
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln einordnen was da nicht stimmt]"""
|
||||
|
||||
conn.execute("UPDATE prompts SET inhalt=?, updated_at=datetime('now') WHERE name='ki_auswertung'",
|
||||
(PROMPT,))
|
||||
print(" ✓ KI-Prompt aktualisiert")
|
||||
|
||||
conn.commit()
|
||||
|
||||
# 5. Status anzeigen
|
||||
print("\n=== Aktuelle Jobs ===")
|
||||
jobs = conn.execute("""
|
||||
SELECT id, scanner, airline_filter, layover_min, layover_max,
|
||||
max_flugzeit_h, max_stops, aktiv
|
||||
FROM jobs ORDER BY id
|
||||
""").fetchall()
|
||||
for j in jobs:
|
||||
al = f" [{j[2]}]" if j[2] else ""
|
||||
print(f" #{j[0]} {j[1]}{al} | Umstieg {j[3]}-{j[4]}min | max {j[5]}h | {j[6]} Stopps | {'✓' if j[7] else '✗'}")
|
||||
|
||||
conn.close()
|
||||
print("\n✅ Migration abgeschlossen")
|
||||
|
|
@ -57,9 +57,11 @@ def dispatch_job(node, job):
|
|||
"trip_type": job.get("trip_type", "roundtrip"),
|
||||
"kabine": job.get("kabine", "premium_economy"),
|
||||
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
||||
"airline_filter": job.get("airline_filter", ""),
|
||||
"layover_min": job.get("layover_min", 120),
|
||||
"layover_max": job.get("layover_max", 300),
|
||||
"airline_filter": job.get("airline_filter", ""),
|
||||
"layover_min": job.get("layover_min", 120),
|
||||
"layover_max": job.get("layover_max", 300),
|
||||
"max_flugzeit_h": job.get("max_flugzeit_h", 22),
|
||||
"max_stops": job.get("max_stops", 2),
|
||||
}
|
||||
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -25,13 +25,17 @@ def job():
|
|||
airline_filter = data.get("airline_filter", "")
|
||||
layover_min = data.get("layover_min", 120)
|
||||
layover_max = data.get("layover_max", 300)
|
||||
max_flugzeit_h = data.get("max_flugzeit_h", 22)
|
||||
max_stops = data.get("max_stops", 2)
|
||||
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[{NODE_NAME}] Job: {scanner}{airline_label} {von}→{nach} | {kabine} | Umstieg {layover_min}-{layover_max}min | +{tage}/{aufenthalt}T")
|
||||
print(f"[{NODE_NAME}] Job: {scanner}{airline_label} {von}→{nach} | {kabine} | "
|
||||
f"Umstieg {layover_min}-{layover_max}min | max {max_flugzeit_h}h/{max_stops} Stopps")
|
||||
|
||||
try:
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||
gepaeck, airline_filter, layover_min, layover_max)
|
||||
gepaeck, airline_filter, layover_min, layover_max,
|
||||
max_flugzeit_h, max_stops)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||
return jsonify({
|
||||
"results": results,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@ import re
|
|||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
dispatcher = {
|
||||
"google_flights": scrape_google_flights,
|
||||
"kayak": scrape_kayak,
|
||||
|
|
@ -17,7 +18,7 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
|||
if not fn:
|
||||
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||
airline_filter, layover_min, layover_max)
|
||||
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops)
|
||||
|
||||
|
||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||
|
|
@ -30,18 +31,25 @@ def _booking_url_google(von, nach, abflug, rueck, kc):
|
|||
|
||||
|
||||
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1,
|
||||
layover_min=120, layover_max=300, airline=""):
|
||||
layover_min=120, layover_max=300, airline="",
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
"""
|
||||
Kayak fs-Filter:
|
||||
bfc=1 → min. 1 Freigepäck inklusive
|
||||
ctr=120,300 → Umstiegszeit 2–5 Stunden (in Minuten)
|
||||
airlines=XX → Airline-Code (CZ, CX, SQ, TG …)
|
||||
bfc=1 → min. 1 Freigepäck inklusive
|
||||
ctr=120,300 → Umstiegszeit 2–5 Stunden (Minuten)
|
||||
duration=-1320 → Max. Gesamtflugzeit (Minuten, hier 22h)
|
||||
s=2 → Max. 2 Stopps
|
||||
airlines=XX → Airline-Code (CZ, CX, SQ, TG …)
|
||||
"""
|
||||
filters = []
|
||||
if bags:
|
||||
filters.append(f"bfc%3D{bags}")
|
||||
if layover_min and layover_max:
|
||||
filters.append(f"ctr%3D{layover_min}%2C{layover_max}")
|
||||
if max_flugzeit_h:
|
||||
filters.append(f"duration%3D-{max_flugzeit_h * 60}")
|
||||
if max_stops is not None and max_stops < 10:
|
||||
filters.append(f"s%3D{max_stops}")
|
||||
if airline:
|
||||
filters.append(f"airlines%3D{airline}")
|
||||
fs = ("&fs=" + "%3B".join(filters)) if filters else ""
|
||||
|
|
@ -122,7 +130,8 @@ def _consent_google(sb):
|
|||
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
abflug_de = (datetime.now() + timedelta(days=tage)).strftime("%d.%m.%Y")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
|
|
@ -246,15 +255,18 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
|||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
bags = 1 if "koffer" in gepaeck else 0
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter)
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0,
|
||||
layover_min, layover_max, airline_filter)
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[KY{airline_label}] URL: {booking_url}")
|
||||
|
||||
|
|
@ -313,7 +325,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
abflug_fmt = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
||||
rueck_fmt = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d") if trip_type == "roundtrip" else ""
|
||||
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
|
|
@ -376,7 +389,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
|||
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2):
|
||||
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
||||
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
||||
return []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue