feat: Airlines CZ/CX/SQ/TG, Kayak Umstiegszeit-Filter 2-5h, airline_filter Job-Parameter
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
c9e8684a88
commit
36d671c301
4 changed files with 118 additions and 66 deletions
|
|
@ -37,19 +37,28 @@ def init_db():
|
|||
trip_type TEXT DEFAULT 'roundtrip',
|
||||
kabine TEXT DEFAULT 'premium_economy',
|
||||
gepaeck TEXT DEFAULT '1koffer+handgepaeck',
|
||||
airline_filter TEXT DEFAULT '',
|
||||
layover_min INTEGER DEFAULT 120,
|
||||
layover_max INTEGER DEFAULT 300,
|
||||
intervall TEXT DEFAULT 'daily',
|
||||
aktiv INTEGER DEFAULT 1,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
# Spalte nachrüsten falls DB schon existiert
|
||||
try:
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN trip_type TEXT DEFAULT 'roundtrip'")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN kabine TEXT DEFAULT 'premium_economy'")
|
||||
except Exception:
|
||||
pass
|
||||
# Spalten nachrüsten falls DB schon existiert
|
||||
for col_sql in [
|
||||
"ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'",
|
||||
"ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60",
|
||||
"ALTER TABLE jobs ADD COLUMN trip_type TEXT DEFAULT 'roundtrip'",
|
||||
"ALTER TABLE jobs ADD COLUMN kabine TEXT DEFAULT 'premium_economy'",
|
||||
"ALTER TABLE jobs ADD COLUMN airline_filter TEXT DEFAULT ''",
|
||||
"ALTER TABLE jobs ADD COLUMN layover_min INTEGER DEFAULT 120",
|
||||
"ALTER TABLE jobs ADD COLUMN layover_max INTEGER DEFAULT 300",
|
||||
]:
|
||||
try:
|
||||
c.execute(col_sql)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS prices (
|
||||
|
|
@ -106,40 +115,42 @@ def init_db():
|
|||
""")
|
||||
|
||||
# Standard-Prompt
|
||||
c.execute("""
|
||||
INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (
|
||||
'ki_auswertung',
|
||||
'Du bist ein Flugpreis-Analyst. Analysiere Preisdaten für folgenden Flug:
|
||||
PROMPT_TEXT = """Du bist ein Flugpreis-Analyst. Analysiere Preisdaten fuer folgenden Flug:
|
||||
|
||||
STRECKE: ROUNDTRIP Frankfurt (FRA) → Phnom Penh Techo Airport (KTI)
|
||||
KABINE: Premium Economy
|
||||
GEPÄCK: 1 großer Aufgabekoffer + Handgepäck (zwingend inklusive!)
|
||||
UMSTIEG: typischerweise Hanoi (HAN) — Durchgepäck FRA→KTI, kein Umsteigen des Koffers nötig
|
||||
AUFENTHALT: ca. 2 Monate
|
||||
STRECKE: ROUNDTRIP Frankfurt (FRA) to Phnom Penh Techo Airport (KTI)
|
||||
KABINE: Premium Economy | GEPAECK: 1 Aufgabekoffer + Handgepaeck | AUFENTHALT: ~2 Monate
|
||||
|
||||
WICHTIG: Nur Preise bewerten die 1 Aufgabekoffer enthalten. Premium Economy schließt dies normalerweise ein — bei verdächtig günstigen Preisen (<700 EUR Roundtrip) prüfen ob es sich um reine Handgepäck-Tarife handeln könnte.
|
||||
BEVORZUGTE AIRLINES:
|
||||
- China Southern (CZ): Hub Guangzhou (CAN)
|
||||
- Cathay Pacific (CX): Hub Hongkong (HKG)
|
||||
- Singapore Airlines (SQ): Hub Singapur (SIN)
|
||||
- Thai Airways (TG): Hub Bangkok (BKK)
|
||||
- Vietnam Airlines (VN): Hub Hanoi (HAN) - Durchgepaeck FRA-KTI
|
||||
|
||||
Aktuelle Preise heute (nach Anbieter und Standort des Scanners):
|
||||
UMSTIEG-REGEL: Umstiegszeit an asiatischen Hubs MUSS 2-5 Stunden sein (120-300 Minuten).
|
||||
Zu kurz (<2h) = Gepaeck-Risiko. Zu lang (>5h) = unzumutbare Wartezeit.
|
||||
|
||||
WICHTIG: Preise unter 1000 EUR sind fuer Roundtrip PE + Koffer + 2 Monate hoechstwahrscheinlich unplausibel.
|
||||
|
||||
Aktuelle Preise (Anbieter | Node | Airline | Preis):
|
||||
{preise_heute}
|
||||
|
||||
Preisverlauf letzte 30 Tage:
|
||||
Verlauf 30 Tage:
|
||||
{preisverlauf}
|
||||
|
||||
Statistik:
|
||||
- Durchschnitt: {avg} EUR
|
||||
- Minimum: {min} EUR
|
||||
- Maximum: {max} EUR
|
||||
Statistik: Durchschnitt {avg} EUR | Min {min} EUR | Max {max} EUR
|
||||
|
||||
Antworte auf Deutsch in diesem Format:
|
||||
Antworte auf Deutsch:
|
||||
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
||||
BEGRUENDUNG: [1-2 Sätze warum]
|
||||
BESTER_PREIS: [günstigster Anbieter, Preis, Scanner-Standort]
|
||||
BEGRUENDUNG: [1-2 Saetze]
|
||||
BESTER_PREIS: [Anbieter + Airline + Preis + Node]
|
||||
BESTE_AIRLINE: [welche der 4 Airlines gerade am guenstigsten]
|
||||
TREND: [STEIGEND / FALLEND / STABIL]
|
||||
GEO_UNTERSCHIED: [Preisunterschied DE-Scanner vs. KH-Scanner falls erkennbar]
|
||||
GEPAECK_WARNUNG: [Ja/Nein — ob Preise möglicherweise kein Aufgabegepäck enthalten]
|
||||
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln bewerten — wahrscheinlich Economy, Hinflug only, oder kein Koffer]'
|
||||
)
|
||||
""")
|
||||
GEO_UNTERSCHIED: [DE-Scanner vs. KH-Scanner Preisdifferenz]
|
||||
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln einordnen - was stimmt da nicht]"""
|
||||
|
||||
c.execute("INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (?, ?)",
|
||||
("ki_auswertung", PROMPT_TEXT))
|
||||
|
||||
# Standard-Nodes
|
||||
c.execute("""
|
||||
|
|
@ -153,10 +164,18 @@ PLAUSI_CHECK: [Preise unter 1000 EUR einzeln bewerten — wahrscheinlich Economy
|
|||
job_count = c.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
||||
if job_count == 0:
|
||||
c.execute("""
|
||||
INSERT INTO jobs (scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck, intervall) VALUES
|
||||
('google_flights', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
||||
('kayak', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
||||
('trip', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily')
|
||||
INSERT INTO jobs
|
||||
(scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||
airline_filter, layover_min, layover_max, intervall)
|
||||
VALUES
|
||||
-- Allgemeine Suche (alle Airlines, Umstieg 2-5h)
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||
('trip', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||
-- Airline-spezifisch via Kayak (Umstieg 2-5h erzwingen)
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CZ', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CX', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','SQ', 120,300,'daily'),
|
||||
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','TG', 120,300,'daily')
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
|
|
|||
|
|
@ -49,14 +49,17 @@ def update_node_status(name, status):
|
|||
|
||||
def dispatch_job(node, job):
|
||||
payload = {
|
||||
"scanner": job["scanner"],
|
||||
"von": job["von"],
|
||||
"nach": job["nach"],
|
||||
"tage": job["tage"],
|
||||
"scanner": job["scanner"],
|
||||
"von": job["von"],
|
||||
"nach": job["nach"],
|
||||
"tage": job["tage"],
|
||||
"aufenthalt_tage": job.get("aufenthalt_tage", 60),
|
||||
"trip_type": job.get("trip_type", "roundtrip"),
|
||||
"kabine": job.get("kabine", "premium_economy"),
|
||||
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
||||
"trip_type": job.get("trip_type", "roundtrip"),
|
||||
"kabine": job.get("kabine", "premium_economy"),
|
||||
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
||||
"airline_filter": job.get("airline_filter", ""),
|
||||
"layover_min": job.get("layover_min", 120),
|
||||
"layover_max": job.get("layover_max", 300),
|
||||
}
|
||||
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -22,11 +22,16 @@ def job():
|
|||
trip_type = data.get("trip_type", "roundtrip")
|
||||
kabine = data.get("kabine", "premium_economy")
|
||||
gepaeck = data.get("gepaeck", "1koffer+handgepaeck")
|
||||
airline_filter = data.get("airline_filter", "")
|
||||
layover_min = data.get("layover_min", 120)
|
||||
layover_max = data.get("layover_max", 300)
|
||||
|
||||
print(f"[{NODE_NAME}] Job: {scanner} {von}→{nach} ({trip_type}, {kabine}, Gepäck: {gepaeck}, +{tage}Tage/{aufenthalt}Tage)")
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[{NODE_NAME}] Job: {scanner}{airline_label} {von}→{nach} | {kabine} | Umstieg {layover_min}-{layover_max}min | +{tage}/{aufenthalt}T")
|
||||
|
||||
try:
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine, gepaeck)
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||
gepaeck, airline_filter, layover_min, layover_max)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||
return jsonify({
|
||||
"results": results,
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ import re
|
|||
|
||||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
dispatcher = {
|
||||
"google_flights": scrape_google_flights,
|
||||
"kayak": scrape_kayak,
|
||||
|
|
@ -15,7 +16,8 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
|||
fn = dispatcher.get(scanner)
|
||||
if not fn:
|
||||
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck)
|
||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||
airline_filter, layover_min, layover_max)
|
||||
|
||||
|
||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||
|
|
@ -27,12 +29,26 @@ def _booking_url_google(von, nach, abflug, rueck, kc):
|
|||
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:{kc}")
|
||||
|
||||
|
||||
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1):
|
||||
# fs=bfc%3D1 = "bags filter: checked bags = 1 free bag included"
|
||||
bag_filter = f"&fs=bfc%3D{bags}" if bags else ""
|
||||
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1,
|
||||
layover_min=120, layover_max=300, airline=""):
|
||||
"""
|
||||
Kayak fs-Filter:
|
||||
bfc=1 → min. 1 Freigepäck inklusive
|
||||
ctr=120,300 → Umstiegszeit 2–5 Stunden (in Minuten)
|
||||
airlines=XX → Airline-Code (CZ, CX, SQ, TG …)
|
||||
"""
|
||||
filters = []
|
||||
if bags:
|
||||
filters.append(f"bfc%3D{bags}")
|
||||
if layover_min and layover_max:
|
||||
filters.append(f"ctr%3D{layover_min}%2C{layover_max}")
|
||||
if airline:
|
||||
filters.append(f"airlines%3D{airline}")
|
||||
fs = ("&fs=" + "%3B".join(filters)) if filters else ""
|
||||
base = f"https://www.kayak.de/flights/{von}-{nach}/{abflug}"
|
||||
if rueck:
|
||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
||||
return f"{base}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR{fs}"
|
||||
return f"{base}?sort=price_a&cabin={kc}¤cy=EUR{fs}"
|
||||
|
||||
|
||||
def _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name):
|
||||
|
|
@ -105,7 +121,8 @@ def _consent_google(sb):
|
|||
|
||||
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
abflug_de = (datetime.now() + timedelta(days=tage)).strftime("%d.%m.%Y")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
|
|
@ -228,17 +245,19 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
|||
|
||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
# Bags-Filter nur wenn explizit Koffer verlangt
|
||||
# Hinweis: fs=bfc%3D1 filtert Kayak auf "1 Freigepäck inkl."
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
bags = 1 if "koffer" in gepaeck else 0
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags)
|
||||
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0) # ohne Filter für Fallback
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter)
|
||||
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0,
|
||||
layover_min, layover_max, airline_filter)
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[KY{airline_label}] URL: {booking_url}")
|
||||
|
||||
print(f"[KY] URL: {booking_url}")
|
||||
results = []
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
||||
|
|
@ -258,7 +277,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
p = _parse_preis(e.text)
|
||||
if p:
|
||||
results.append({"scanner": "kayak", "preis": p,
|
||||
"waehrung": "EUR", "airline": "",
|
||||
"waehrung": "EUR",
|
||||
"airline": airline_filter or "",
|
||||
"abflug": abflug, "ankunft": rueck,
|
||||
"booking_url": booking_url})
|
||||
if results:
|
||||
|
|
@ -270,26 +290,30 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
for r in _preise_aus_body(body, "kayak", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url
|
||||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
|
||||
# Kein Ergebnis mit Bags-Filter → Fallback ohne Filter (zeigt was verfügbar ist)
|
||||
# Kein Ergebnis mit Bags-Filter → Fallback ohne Filter
|
||||
if not results and bags > 0:
|
||||
print(f"[KY] Kein Ergebnis mit Bags-Filter — Fallback ohne Filter")
|
||||
print(f"[KY] Kein Ergebnis mit Filtern — Fallback ohne Bags-Filter")
|
||||
sb.open(booking_url_raw)
|
||||
sb.sleep(12)
|
||||
body2 = sb.get_text("body")
|
||||
for r in _preise_aus_body(body2, "kayak", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url_raw
|
||||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
|
||||
print(f"[KY] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
|
||||
|
||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
abflug_fmt = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
||||
rueck_fmt = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d") if trip_type == "roundtrip" else ""
|
||||
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
|
|
@ -351,7 +375,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
|||
|
||||
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
layover_min=120, layover_max=300):
|
||||
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
||||
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
||||
return []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue