fix: Google Flights Formular-Suche statt Hash-URL, alle scraper gepaeck-param fix

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Cursor 2026-02-25 15:40:44 +07:00
parent b70dbbcd13
commit c9e8684a88

View file

@ -106,42 +106,106 @@ def _consent_google(sb):
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
trip_type="roundtrip", kabine="premium_economy",
gepaeck="1koffer+handgepaeck"):
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
abflug_de = (datetime.now() + timedelta(days=tage)).strftime("%d.%m.%Y")
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
rueck_de = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%d.%m.%Y") if trip_type == "roundtrip" else ""
kc = KABINE_GOOGLE.get(kabine, "w")
booking_url = _booking_url_google(von, nach, abflug, rueck, kc)
print(f"[GF] URL: {booking_url[:100]}")
# Stadtname-Mapping für die Suchfelder
stadtname = {"FRA": "Frankfurt", "HAN": "Hanoi", "KTI": "Phnom Penh",
"PNH": "Phnom Penh", "BKK": "Bangkok", "SGN": "Ho Chi Minh"}
von_name = stadtname.get(von, von)
nach_name = stadtname.get(nach, nach)
print(f"[GF] Formular-Suche: {von_name}{nach_name} {abflug_de}")
results = []
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
sb.open(booking_url)
sb.sleep(8)
# 1. Startseite laden
sb.open("https://www.google.com/travel/flights?hl=de&curr=EUR")
sb.sleep(6)
if _consent_google(sb):
sb.open(booking_url)
sb.sleep(10)
title = sb.get_title()
print(f"[GF] Title: {title[:80]}")
# Wenn Homepage geladen statt Suchergebnisse: JS-Navigation erzwingen
if "weltweit" in title or title.strip() == "" or "Google" == title.strip():
print("[GF] Homepage erkannt — erzwinge Navigation via JS")
sb.execute_script(f"window.location.href = '{booking_url}';")
sb.sleep(10)
title = sb.get_title()
print(f"[GF] Title nach JS-Nav: {title[:80]}")
body = sb.get_text("body")
print(f"[GF] Body-Länge: {len(body)}")
sb.sleep(4)
# 2. Kabinen-Typ setzen (Roundtrip / Economy dropdown)
try:
elems = sb.find_elements('[aria-label*=""]', timeout=3)
# Kabinen-Auswahl: nach "Premium Economy" suchen
kab_label = {"e": "Economy", "w": "Premium Economy", "b": "Business", "f": "First"}
print(f"[GF] Versuche Kabine zu setzen: {kab_label.get(kc, 'Premium Economy')}")
# Kabinen-Dropdown öffnen (2. Dropdown in der Suchleiste)
kab_btns = sb.find_elements('[class*="cabin"] button, [aria-label*="abine"], [data-value*="cabin"]', timeout=3)
if not kab_btns:
kab_btns = sb.find_elements('button[class*="VfPpkd"]', timeout=2)
if kab_btns:
kab_btns[1].click()
sb.sleep(1)
except Exception as e:
print(f"[GF] Kabinen-Klick fehlgeschlagen: {e}")
# 3. Abflugort eingeben
try:
von_field = None
for sel in ['input[placeholder*="Von"], input[aria-label*="Von"], input[aria-label*="Abflug"], '
'input[placeholder*="Where from"]']:
elems = sb.find_elements(sel, timeout=2)
if elems:
von_field = elems[0]
break
if not von_field:
# Fallback: erster Input im Suchformular
von_field = sb.find_element('div[class*="target"] input', timeout=3)
von_field.clear()
von_field.send_keys(von_name)
sb.sleep(2)
# Ersten Vorschlag auswählen
sb.find_element('[role="option"]', timeout=3).click()
sb.sleep(1)
print(f"[GF] Abflugort gesetzt: {von_name}")
except Exception as e:
print(f"[GF] Abflugort-Eingabe fehlgeschlagen: {e}")
# 4. Zielort eingeben
try:
nach_field = sb.find_element(
'input[placeholder*="Wohin"], input[aria-label*="Ziel"], input[aria-label*="Where to"]',
timeout=3)
nach_field.clear()
nach_field.send_keys(nach_name)
sb.sleep(2)
sb.find_element('[role="option"]', timeout=3).click()
sb.sleep(1)
print(f"[GF] Zielort gesetzt: {nach_name}")
except Exception as e:
print(f"[GF] Zielort-Eingabe fehlgeschlagen: {e}")
# 5. Suchen-Button klicken
try:
for sel in ['button[aria-label*="Suchen"], button[aria-label*="Search"]',
'button[jsname="vLv7Lb"]', 'button[class*="search"]']:
btns = sb.find_elements(sel, timeout=2)
if btns:
btns[0].click()
print(f"[GF] Suche gestartet via {sel}")
break
except Exception as e:
print(f"[GF] Such-Button fehlgeschlagen: {e}")
sb.sleep(12)
title = sb.get_title()
body = sb.get_text("body")
print(f"[GF] Title: {title[:80]} | Body: {len(body)} chars")
# 6. Preise extrahieren
try:
elems = sb.find_elements('[aria-label*=""], [aria-label*="EUR"]', timeout=3)
for elem in elems[:20]:
label = elem.get_attribute('aria-label') or elem.text
p = _parse_preis(label)
if p:
if p and p > 400:
results.append({"scanner": "google_flights", "preis": p,
"waehrung": "EUR", "airline": "",
"abflug": abflug, "ankunft": rueck,
@ -149,12 +213,15 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
except Exception:
pass
# Fallback: Regex über Body (nur plausible Preise > 400 EUR)
if not results:
for r in _preise_aus_body(body, "google_flights", abflug):
r["ankunft"] = rueck
r["booking_url"] = booking_url
results.append(r)
if r["preis"] > 400:
r["ankunft"] = rueck
r["booking_url"] = booking_url
results.append(r)
results = [r for r in results if r["preis"] > 400]
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
return results[:10]