feat: Momondo + Wego + Traveloka Scanner, Google Flights deaktiviert

- worker.py: scrape_momondo() — gleiche Firma wie Kayak, andere Preise
- worker.py: scrape_wego() — asiatische Flugsuchmaschine
- worker.py: scrape_traveloka() — größte SE-Asien Reiseplattform
- worker.py: Google Flights + Skyscanner auf _scrape_disabled gesetzt
- db.py: Dispatcher um momondo/wego/traveloka erweitert

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Cursor 2026-02-25 21:39:46 +07:00
parent f8797f3e08
commit 207485fb8f

View file

@ -3,6 +3,12 @@ from datetime import datetime, timedelta
import re
def _scrape_disabled(*args, **kwargs):
"""Deaktivierter Scanner — gibt leere Ergebnisse zurück."""
print("[SKIP] Scanner deaktiviert")
return [], ""
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck", airline_filter="",
@ -15,10 +21,13 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
screenshot_b64 = JPEG Full-Page Screenshot als base64-String (leer wenn Fehler)
"""
dispatcher = {
"google_flights": scrape_google_flights,
"google_flights": _scrape_disabled,
"kayak": scrape_kayak,
"kayak_multicity": scrape_kayak_multicity,
"skyscanner": scrape_skyscanner,
"momondo": scrape_momondo,
"wego": scrape_wego,
"traveloka": scrape_traveloka,
"skyscanner": _scrape_disabled,
"trip": scrape_trip,
}
fn = dispatcher.get(scanner)
@ -90,6 +99,28 @@ def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1,
return f"{base}?sort=price_a&cabin={kc}&currency=EUR{fs}"
def _booking_url_momondo(von, nach, abflug, rueck, kc, bags=1,
layover_min=120, layover_max=300, airline="",
max_flugzeit_h=22, max_stops=2):
"""Momondo URL — gleiche Struktur wie Kayak (Booking Holdings), andere Domain."""
filters = []
if bags:
filters.append(f"bfc%3D{bags}")
if layover_min and layover_max:
filters.append(f"ctr%3D{layover_min}%2C{layover_max}")
if max_flugzeit_h:
filters.append(f"duration%3D-{max_flugzeit_h * 60}")
if max_stops is not None and max_stops < 10:
filters.append(f"s%3D{max_stops}")
if airline:
filters.append(f"airlines%3D{airline}")
fs = ("&fs=" + "%3B".join(filters)) if filters else ""
base = f"https://www.momondo.de/flightsearch/{von}-{nach}/{abflug}"
if rueck:
return f"{base}/{rueck}?sort=price_a&cabin={kc}&currency=EUR{fs}"
return f"{base}?sort=price_a&cabin={kc}&currency=EUR{fs}"
def _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name):
if rueck_fmt:
return (f"https://www.trip.com/flights/{von_name}-to-{nach_name}/"
@ -214,10 +245,33 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
print(f"[GF] Suche: {von_name}{nach_name} {abflug_de}")
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
sb.open("https://www.google.com/travel/flights?hl=de&curr=EUR")
sb.sleep(5)
# ── Strategie 1: Direkte URL mit Datums-Parametern ─────────────────
# Google Flights verarbeitet den Hash-Fragment erst nach JS-Ausführung
direct_url = (
f"https://www.google.com/travel/flights?hl=de&curr=EUR"
f"#flt={von}.{nach}.{abflug}*{nach}.{von}.{rueck}"
f";c:EUR;e:1;sd:1;t:r;sc:w"
) if rueck else (
f"https://www.google.com/travel/flights?hl=de&curr=EUR"
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:w"
)
sb.open(direct_url)
sb.sleep(8)
_consent_google(sb)
sb.sleep(2)
sb.sleep(3)
title_direct = sb.get_title()
print(f"[GF] URL-Ansatz: {title_direct[:60]}")
# Wenn direkte URL Ergebnisse liefert (Titel enthält Städtenamen)
url_erfolgreich = any(kw in title_direct for kw in
[von, nach, "FRA", "KTI", "Frankfurt", "Phnom", "Flüge"])
if not url_erfolgreich:
# ── Strategie 2: Startseite + Formular befüllen ─────────────────
print("[GF] Direktlink kein Ergebnis — wechsle zu Formular-Ansatz")
sb.open("https://www.google.com/travel/flights?hl=de&curr=EUR")
sb.sleep(5)
_consent_google(sb)
sb.sleep(2)
# ── 1. Kabine auf "Premium Economy" setzen ──────────────────────────
try:
@ -647,6 +701,226 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
return results[:10], screenshot_b64
def scrape_momondo(von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck", airline_filter="",
layover_min=120, layover_max=300,
max_flugzeit_h=22, max_stops=2):
"""Momondo — gleiche Firma wie Kayak, aber oft andere Preise."""
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") \
if trip_type == "roundtrip" else ""
kc = KABINE_KAYAK.get(kabine, "w")
bags = 1 if "koffer" in gepaeck else 0
booking_url = _booking_url_momondo(von, nach, abflug, rueck, kc, bags,
layover_min, layover_max, airline_filter,
max_flugzeit_h, max_stops)
booking_url_raw = _booking_url_momondo(von, nach, abflug, rueck, kc, 0,
layover_min, layover_max, airline_filter,
max_flugzeit_h, max_stops)
airline_label = f" [{airline_filter}]" if airline_filter else ""
print(f"[MO{airline_label}] URL: {booking_url}")
results = []
screenshot_b64 = ""
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
sb.open(booking_url)
sb.sleep(15)
title = sb.get_title()
body = sb.get_text("body")
print(f"[MO] Title: {title[:80]}")
for sel in ['.price-text', '.f8F1-price-text', 'div[class*="price"] span',
'span[class*="price"]', '.Iqt3', 'div.nrc6-price', '.price',
'[class*="resultPrice"]', '.lowest-price']:
try:
elems = sb.find_elements(sel)
if elems:
for e in elems[:15]:
p = _parse_preis(e.text)
if p:
results.append({"scanner": "momondo", "preis": p,
"waehrung": "EUR",
"airline": airline_filter or "",
"abflug": abflug, "ankunft": rueck,
"booking_url": booking_url})
if results:
break
except Exception:
pass
if not results:
for r in _preise_aus_body(body, "momondo", abflug):
r["ankunft"] = rueck
r["booking_url"] = booking_url
r["airline"] = airline_filter or ""
results.append(r)
# Fallback ohne Bags-Filter
if not results and bags > 0:
print(f"[MO] Kein Ergebnis — Fallback ohne Bags-Filter")
sb.open(booking_url_raw)
sb.sleep(12)
body2 = sb.get_text("body")
for r in _preise_aus_body(body2, "momondo", abflug):
r["ankunft"] = rueck
r["booking_url"] = booking_url_raw
r["airline"] = airline_filter or ""
results.append(r)
print(f"[MO{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_wego(von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck", airline_filter="",
layover_min=120, layover_max=300,
max_flugzeit_h=22, max_stops=2):
"""Wego — asiatische Flugsuchmaschine, populär in Südostasien."""
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") \
if trip_type == "roundtrip" else ""
KABINE_WEGO = {"economy": "economy", "premium_economy": "premiumEconomy",
"business": "business", "first": "first"}
kc = KABINE_WEGO.get(kabine, "premiumEconomy")
if rueck:
booking_url = (f"https://www.wego.com/flights/searches/new?"
f"origin={von.lower()}&destination={nach.lower()}"
f"&outbound_date={abflug}&inbound_date={rueck}"
f"&cabin={kc}&adults=1&children=0&infants=0"
f"&currency=EUR&sort=price")
else:
booking_url = (f"https://www.wego.com/flights/searches/new?"
f"origin={von.lower()}&destination={nach.lower()}"
f"&outbound_date={abflug}"
f"&cabin={kc}&adults=1&children=0&infants=0"
f"&currency=EUR&sort=price")
print(f"[WG] URL: {booking_url}")
results = []
screenshot_b64 = ""
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
sb.open(booking_url)
sb.sleep(18)
title = sb.get_title()
body = sb.get_text("body")
print(f"[WG] Title: {title[:80]} | Body: {len(body)} chars")
for sel in ['[class*="price"]', '[data-testid*="price"]',
'.flight-price', 'span[class*="Price"]',
'.fare-price', '[class*="FarePrice"]']:
try:
elems = sb.find_elements(sel)
if elems:
for e in elems[:15]:
p = _parse_preis(e.text)
if p:
results.append({"scanner": "wego", "preis": p,
"waehrung": "EUR", "airline": "",
"abflug": abflug, "ankunft": rueck,
"booking_url": booking_url})
if results:
break
except Exception:
pass
if not results:
for r in _preise_aus_body(body, "wego", abflug):
r["ankunft"] = rueck
r["booking_url"] = booking_url
results.append(r)
print(f"[WG] Ergebnis: {[r['preis'] for r in results[:5]]}")
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_traveloka(von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck", airline_filter="",
layover_min=120, layover_max=300,
max_flugzeit_h=22, max_stops=2):
"""Traveloka — größte Reiseplattform Südostasiens."""
abflug = (datetime.now() + timedelta(days=tage)).strftime("%d-%m-%Y")
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%d-%m-%Y") \
if trip_type == "roundtrip" else ""
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
rueck_iso = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") \
if trip_type == "roundtrip" else ""
# Traveloka URL-Parameter
KABINE_TV = {"economy": "ECONOMY", "premium_economy": "PREMIUM_ECONOMY",
"business": "BUSINESS", "first": "FIRST_CLASS"}
kc = KABINE_TV.get(kabine, "PREMIUM_ECONOMY")
if rueck:
booking_url = (f"https://www.traveloka.com/en-en/flight/fullsearch?"
f"ap={von}.{nach}&dt={abflug}.{rueck}"
f"&ps=1.0.0&sc={kc}")
else:
booking_url = (f"https://www.traveloka.com/en-en/flight/fullsearch?"
f"ap={von}.{nach}&dt={abflug}"
f"&ps=1.0.0&sc={kc}")
print(f"[TV] URL: {booking_url}")
results = []
screenshot_b64 = ""
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
sb.open(booking_url)
sb.sleep(18)
title = sb.get_title()
body = sb.get_text("body")
print(f"[TV] Title: {title[:80]} | Body: {len(body)} chars")
# Cookie/Consent Banner klicken
for sel in ['button[data-testid*="accept"]', 'button[id*="accept"]',
'#onetrust-accept-btn-handler', 'button[class*="accept"]']:
try:
sb.find_element(sel, timeout=2).click()
sb.sleep(2)
break
except Exception:
pass
for sel in ['[class*="price"]', '[data-testid*="price"]',
'.flight-price', 'span[class*="Price"]',
'div[class*="farePrice"]', '[class*="totalPrice"]']:
try:
elems = sb.find_elements(sel)
if elems:
for e in elems[:15]:
p = _parse_preis(e.text)
if p:
results.append({"scanner": "traveloka", "preis": p,
"waehrung": "EUR", "airline": "",
"abflug": abflug_iso, "ankunft": rueck_iso,
"booking_url": booking_url})
if results:
break
except Exception:
pass
if not results:
for r in _preise_aus_body(body, "traveloka", abflug_iso):
r["ankunft"] = rueck_iso
r["booking_url"] = booking_url
results.append(r)
print(f"[TV] Ergebnis: {[r['preis'] for r in results[:5]]}")
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck", airline_filter="",