feat: Airlines CZ/CX/SQ/TG, Kayak Umstiegszeit-Filter 2-5h, airline_filter Job-Parameter
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
c9e8684a88
commit
36d671c301
4 changed files with 118 additions and 66 deletions
|
|
@ -37,19 +37,28 @@ def init_db():
|
||||||
trip_type TEXT DEFAULT 'roundtrip',
|
trip_type TEXT DEFAULT 'roundtrip',
|
||||||
kabine TEXT DEFAULT 'premium_economy',
|
kabine TEXT DEFAULT 'premium_economy',
|
||||||
gepaeck TEXT DEFAULT '1koffer+handgepaeck',
|
gepaeck TEXT DEFAULT '1koffer+handgepaeck',
|
||||||
|
airline_filter TEXT DEFAULT '',
|
||||||
|
layover_min INTEGER DEFAULT 120,
|
||||||
|
layover_max INTEGER DEFAULT 300,
|
||||||
intervall TEXT DEFAULT 'daily',
|
intervall TEXT DEFAULT 'daily',
|
||||||
aktiv INTEGER DEFAULT 1,
|
aktiv INTEGER DEFAULT 1,
|
||||||
created_at TEXT DEFAULT (datetime('now'))
|
created_at TEXT DEFAULT (datetime('now'))
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
# Spalte nachrüsten falls DB schon existiert
|
# Spalten nachrüsten falls DB schon existiert
|
||||||
try:
|
for col_sql in [
|
||||||
c.execute("ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'")
|
"ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'",
|
||||||
c.execute("ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60")
|
"ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60",
|
||||||
c.execute("ALTER TABLE jobs ADD COLUMN trip_type TEXT DEFAULT 'roundtrip'")
|
"ALTER TABLE jobs ADD COLUMN trip_type TEXT DEFAULT 'roundtrip'",
|
||||||
c.execute("ALTER TABLE jobs ADD COLUMN kabine TEXT DEFAULT 'premium_economy'")
|
"ALTER TABLE jobs ADD COLUMN kabine TEXT DEFAULT 'premium_economy'",
|
||||||
except Exception:
|
"ALTER TABLE jobs ADD COLUMN airline_filter TEXT DEFAULT ''",
|
||||||
pass
|
"ALTER TABLE jobs ADD COLUMN layover_min INTEGER DEFAULT 120",
|
||||||
|
"ALTER TABLE jobs ADD COLUMN layover_max INTEGER DEFAULT 300",
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
c.execute(col_sql)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
c.execute("""
|
c.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS prices (
|
CREATE TABLE IF NOT EXISTS prices (
|
||||||
|
|
@ -106,40 +115,42 @@ def init_db():
|
||||||
""")
|
""")
|
||||||
|
|
||||||
# Standard-Prompt
|
# Standard-Prompt
|
||||||
c.execute("""
|
PROMPT_TEXT = """Du bist ein Flugpreis-Analyst. Analysiere Preisdaten fuer folgenden Flug:
|
||||||
INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (
|
|
||||||
'ki_auswertung',
|
|
||||||
'Du bist ein Flugpreis-Analyst. Analysiere Preisdaten für folgenden Flug:
|
|
||||||
|
|
||||||
STRECKE: ROUNDTRIP Frankfurt (FRA) → Phnom Penh Techo Airport (KTI)
|
STRECKE: ROUNDTRIP Frankfurt (FRA) to Phnom Penh Techo Airport (KTI)
|
||||||
KABINE: Premium Economy
|
KABINE: Premium Economy | GEPAECK: 1 Aufgabekoffer + Handgepaeck | AUFENTHALT: ~2 Monate
|
||||||
GEPÄCK: 1 großer Aufgabekoffer + Handgepäck (zwingend inklusive!)
|
|
||||||
UMSTIEG: typischerweise Hanoi (HAN) — Durchgepäck FRA→KTI, kein Umsteigen des Koffers nötig
|
|
||||||
AUFENTHALT: ca. 2 Monate
|
|
||||||
|
|
||||||
WICHTIG: Nur Preise bewerten die 1 Aufgabekoffer enthalten. Premium Economy schließt dies normalerweise ein — bei verdächtig günstigen Preisen (<700 EUR Roundtrip) prüfen ob es sich um reine Handgepäck-Tarife handeln könnte.
|
BEVORZUGTE AIRLINES:
|
||||||
|
- China Southern (CZ): Hub Guangzhou (CAN)
|
||||||
|
- Cathay Pacific (CX): Hub Hongkong (HKG)
|
||||||
|
- Singapore Airlines (SQ): Hub Singapur (SIN)
|
||||||
|
- Thai Airways (TG): Hub Bangkok (BKK)
|
||||||
|
- Vietnam Airlines (VN): Hub Hanoi (HAN) - Durchgepaeck FRA-KTI
|
||||||
|
|
||||||
Aktuelle Preise heute (nach Anbieter und Standort des Scanners):
|
UMSTIEG-REGEL: Umstiegszeit an asiatischen Hubs MUSS 2-5 Stunden sein (120-300 Minuten).
|
||||||
|
Zu kurz (<2h) = Gepaeck-Risiko. Zu lang (>5h) = unzumutbare Wartezeit.
|
||||||
|
|
||||||
|
WICHTIG: Preise unter 1000 EUR sind fuer Roundtrip PE + Koffer + 2 Monate hoechstwahrscheinlich unplausibel.
|
||||||
|
|
||||||
|
Aktuelle Preise (Anbieter | Node | Airline | Preis):
|
||||||
{preise_heute}
|
{preise_heute}
|
||||||
|
|
||||||
Preisverlauf letzte 30 Tage:
|
Verlauf 30 Tage:
|
||||||
{preisverlauf}
|
{preisverlauf}
|
||||||
|
|
||||||
Statistik:
|
Statistik: Durchschnitt {avg} EUR | Min {min} EUR | Max {max} EUR
|
||||||
- Durchschnitt: {avg} EUR
|
|
||||||
- Minimum: {min} EUR
|
|
||||||
- Maximum: {max} EUR
|
|
||||||
|
|
||||||
Antworte auf Deutsch in diesem Format:
|
Antworte auf Deutsch:
|
||||||
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
||||||
BEGRUENDUNG: [1-2 Sätze warum]
|
BEGRUENDUNG: [1-2 Saetze]
|
||||||
BESTER_PREIS: [günstigster Anbieter, Preis, Scanner-Standort]
|
BESTER_PREIS: [Anbieter + Airline + Preis + Node]
|
||||||
|
BESTE_AIRLINE: [welche der 4 Airlines gerade am guenstigsten]
|
||||||
TREND: [STEIGEND / FALLEND / STABIL]
|
TREND: [STEIGEND / FALLEND / STABIL]
|
||||||
GEO_UNTERSCHIED: [Preisunterschied DE-Scanner vs. KH-Scanner falls erkennbar]
|
GEO_UNTERSCHIED: [DE-Scanner vs. KH-Scanner Preisdifferenz]
|
||||||
GEPAECK_WARNUNG: [Ja/Nein — ob Preise möglicherweise kein Aufgabegepäck enthalten]
|
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln einordnen - was stimmt da nicht]"""
|
||||||
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln bewerten — wahrscheinlich Economy, Hinflug only, oder kein Koffer]'
|
|
||||||
)
|
c.execute("INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (?, ?)",
|
||||||
""")
|
("ki_auswertung", PROMPT_TEXT))
|
||||||
|
|
||||||
# Standard-Nodes
|
# Standard-Nodes
|
||||||
c.execute("""
|
c.execute("""
|
||||||
|
|
@ -153,10 +164,18 @@ PLAUSI_CHECK: [Preise unter 1000 EUR einzeln bewerten — wahrscheinlich Economy
|
||||||
job_count = c.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
job_count = c.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
||||||
if job_count == 0:
|
if job_count == 0:
|
||||||
c.execute("""
|
c.execute("""
|
||||||
INSERT INTO jobs (scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck, intervall) VALUES
|
INSERT INTO jobs
|
||||||
('google_flights', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
(scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||||
('kayak', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
airline_filter, layover_min, layover_max, intervall)
|
||||||
('trip', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily')
|
VALUES
|
||||||
|
-- Allgemeine Suche (alle Airlines, Umstieg 2-5h)
|
||||||
|
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||||
|
('trip', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','', 120,300,'daily'),
|
||||||
|
-- Airline-spezifisch via Kayak (Umstieg 2-5h erzwingen)
|
||||||
|
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CZ', 120,300,'daily'),
|
||||||
|
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','CX', 120,300,'daily'),
|
||||||
|
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','SQ', 120,300,'daily'),
|
||||||
|
('kayak', 'FRA','KTI',30,60,'roundtrip','premium_economy','1koffer+handgepaeck','TG', 120,300,'daily')
|
||||||
""")
|
""")
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
|
||||||
|
|
@ -49,14 +49,17 @@ def update_node_status(name, status):
|
||||||
|
|
||||||
def dispatch_job(node, job):
|
def dispatch_job(node, job):
|
||||||
payload = {
|
payload = {
|
||||||
"scanner": job["scanner"],
|
"scanner": job["scanner"],
|
||||||
"von": job["von"],
|
"von": job["von"],
|
||||||
"nach": job["nach"],
|
"nach": job["nach"],
|
||||||
"tage": job["tage"],
|
"tage": job["tage"],
|
||||||
"aufenthalt_tage": job.get("aufenthalt_tage", 60),
|
"aufenthalt_tage": job.get("aufenthalt_tage", 60),
|
||||||
"trip_type": job.get("trip_type", "roundtrip"),
|
"trip_type": job.get("trip_type", "roundtrip"),
|
||||||
"kabine": job.get("kabine", "premium_economy"),
|
"kabine": job.get("kabine", "premium_economy"),
|
||||||
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
||||||
|
"airline_filter": job.get("airline_filter", ""),
|
||||||
|
"layover_min": job.get("layover_min", 120),
|
||||||
|
"layover_max": job.get("layover_max", 300),
|
||||||
}
|
}
|
||||||
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
|
|
@ -22,11 +22,16 @@ def job():
|
||||||
trip_type = data.get("trip_type", "roundtrip")
|
trip_type = data.get("trip_type", "roundtrip")
|
||||||
kabine = data.get("kabine", "premium_economy")
|
kabine = data.get("kabine", "premium_economy")
|
||||||
gepaeck = data.get("gepaeck", "1koffer+handgepaeck")
|
gepaeck = data.get("gepaeck", "1koffer+handgepaeck")
|
||||||
|
airline_filter = data.get("airline_filter", "")
|
||||||
|
layover_min = data.get("layover_min", 120)
|
||||||
|
layover_max = data.get("layover_max", 300)
|
||||||
|
|
||||||
print(f"[{NODE_NAME}] Job: {scanner} {von}→{nach} ({trip_type}, {kabine}, Gepäck: {gepaeck}, +{tage}Tage/{aufenthalt}Tage)")
|
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||||
|
print(f"[{NODE_NAME}] Job: {scanner}{airline_label} {von}→{nach} | {kabine} | Umstieg {layover_min}-{layover_max}min | +{tage}/{aufenthalt}T")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine, gepaeck)
|
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||||
|
gepaeck, airline_filter, layover_min, layover_max)
|
||||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"results": results,
|
"results": results,
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ import re
|
||||||
|
|
||||||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||||
trip_type="roundtrip", kabine="premium_economy",
|
trip_type="roundtrip", kabine="premium_economy",
|
||||||
gepaeck="1koffer+handgepaeck"):
|
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||||
|
layover_min=120, layover_max=300):
|
||||||
dispatcher = {
|
dispatcher = {
|
||||||
"google_flights": scrape_google_flights,
|
"google_flights": scrape_google_flights,
|
||||||
"kayak": scrape_kayak,
|
"kayak": scrape_kayak,
|
||||||
|
|
@ -15,7 +16,8 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||||
fn = dispatcher.get(scanner)
|
fn = dispatcher.get(scanner)
|
||||||
if not fn:
|
if not fn:
|
||||||
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
||||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck)
|
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck,
|
||||||
|
airline_filter, layover_min, layover_max)
|
||||||
|
|
||||||
|
|
||||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||||
|
|
@ -27,12 +29,26 @@ def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||||
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:{kc}")
|
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:{kc}")
|
||||||
|
|
||||||
|
|
||||||
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1):
|
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1,
|
||||||
# fs=bfc%3D1 = "bags filter: checked bags = 1 free bag included"
|
layover_min=120, layover_max=300, airline=""):
|
||||||
bag_filter = f"&fs=bfc%3D{bags}" if bags else ""
|
"""
|
||||||
|
Kayak fs-Filter:
|
||||||
|
bfc=1 → min. 1 Freigepäck inklusive
|
||||||
|
ctr=120,300 → Umstiegszeit 2–5 Stunden (in Minuten)
|
||||||
|
airlines=XX → Airline-Code (CZ, CX, SQ, TG …)
|
||||||
|
"""
|
||||||
|
filters = []
|
||||||
|
if bags:
|
||||||
|
filters.append(f"bfc%3D{bags}")
|
||||||
|
if layover_min and layover_max:
|
||||||
|
filters.append(f"ctr%3D{layover_min}%2C{layover_max}")
|
||||||
|
if airline:
|
||||||
|
filters.append(f"airlines%3D{airline}")
|
||||||
|
fs = ("&fs=" + "%3B".join(filters)) if filters else ""
|
||||||
|
base = f"https://www.kayak.de/flights/{von}-{nach}/{abflug}"
|
||||||
if rueck:
|
if rueck:
|
||||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
return f"{base}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR{fs}"
|
||||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
return f"{base}?sort=price_a&cabin={kc}¤cy=EUR{fs}"
|
||||||
|
|
||||||
|
|
||||||
def _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name):
|
def _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name):
|
||||||
|
|
@ -105,7 +121,8 @@ def _consent_google(sb):
|
||||||
|
|
||||||
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
trip_type="roundtrip", kabine="premium_economy",
|
trip_type="roundtrip", kabine="premium_economy",
|
||||||
gepaeck="1koffer+handgepaeck"):
|
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||||
|
layover_min=120, layover_max=300):
|
||||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||||
abflug_de = (datetime.now() + timedelta(days=tage)).strftime("%d.%m.%Y")
|
abflug_de = (datetime.now() + timedelta(days=tage)).strftime("%d.%m.%Y")
|
||||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||||
|
|
@ -228,17 +245,19 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
||||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
trip_type="roundtrip", kabine="premium_economy",
|
trip_type="roundtrip", kabine="premium_economy",
|
||||||
gepaeck="1koffer+handgepaeck"):
|
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||||
|
layover_min=120, layover_max=300):
|
||||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||||
kc = KABINE_KAYAK.get(kabine, "w")
|
kc = KABINE_KAYAK.get(kabine, "w")
|
||||||
# Bags-Filter nur wenn explizit Koffer verlangt
|
|
||||||
# Hinweis: fs=bfc%3D1 filtert Kayak auf "1 Freigepäck inkl."
|
|
||||||
bags = 1 if "koffer" in gepaeck else 0
|
bags = 1 if "koffer" in gepaeck else 0
|
||||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags)
|
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags,
|
||||||
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0) # ohne Filter für Fallback
|
layover_min, layover_max, airline_filter)
|
||||||
|
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0,
|
||||||
|
layover_min, layover_max, airline_filter)
|
||||||
|
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||||
|
print(f"[KY{airline_label}] URL: {booking_url}")
|
||||||
|
|
||||||
print(f"[KY] URL: {booking_url}")
|
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
||||||
|
|
@ -258,7 +277,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
p = _parse_preis(e.text)
|
p = _parse_preis(e.text)
|
||||||
if p:
|
if p:
|
||||||
results.append({"scanner": "kayak", "preis": p,
|
results.append({"scanner": "kayak", "preis": p,
|
||||||
"waehrung": "EUR", "airline": "",
|
"waehrung": "EUR",
|
||||||
|
"airline": airline_filter or "",
|
||||||
"abflug": abflug, "ankunft": rueck,
|
"abflug": abflug, "ankunft": rueck,
|
||||||
"booking_url": booking_url})
|
"booking_url": booking_url})
|
||||||
if results:
|
if results:
|
||||||
|
|
@ -270,26 +290,30 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
for r in _preise_aus_body(body, "kayak", abflug):
|
for r in _preise_aus_body(body, "kayak", abflug):
|
||||||
r["ankunft"] = rueck
|
r["ankunft"] = rueck
|
||||||
r["booking_url"] = booking_url
|
r["booking_url"] = booking_url
|
||||||
|
r["airline"] = airline_filter or ""
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
# Kein Ergebnis mit Bags-Filter → Fallback ohne Filter (zeigt was verfügbar ist)
|
# Kein Ergebnis mit Bags-Filter → Fallback ohne Filter
|
||||||
if not results and bags > 0:
|
if not results and bags > 0:
|
||||||
print(f"[KY] Kein Ergebnis mit Bags-Filter — Fallback ohne Filter")
|
print(f"[KY] Kein Ergebnis mit Filtern — Fallback ohne Bags-Filter")
|
||||||
sb.open(booking_url_raw)
|
sb.open(booking_url_raw)
|
||||||
sb.sleep(12)
|
sb.sleep(12)
|
||||||
body2 = sb.get_text("body")
|
body2 = sb.get_text("body")
|
||||||
for r in _preise_aus_body(body2, "kayak", abflug):
|
for r in _preise_aus_body(body2, "kayak", abflug):
|
||||||
r["ankunft"] = rueck
|
r["ankunft"] = rueck
|
||||||
r["booking_url"] = booking_url_raw
|
r["booking_url"] = booking_url_raw
|
||||||
|
r["airline"] = airline_filter or ""
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
print(f"[KY] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||||
|
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||||
return results[:10]
|
return results[:10]
|
||||||
|
|
||||||
|
|
||||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
trip_type="roundtrip", kabine="premium_economy",
|
trip_type="roundtrip", kabine="premium_economy",
|
||||||
gepaeck="1koffer+handgepaeck"):
|
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||||
|
layover_min=120, layover_max=300):
|
||||||
abflug_fmt = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
abflug_fmt = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
||||||
rueck_fmt = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d") if trip_type == "roundtrip" else ""
|
rueck_fmt = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d") if trip_type == "roundtrip" else ""
|
||||||
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||||
|
|
@ -351,7 +375,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
||||||
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
trip_type="roundtrip", kabine="premium_economy",
|
trip_type="roundtrip", kabine="premium_economy",
|
||||||
gepaeck="1koffer+handgepaeck"):
|
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||||
|
layover_min=120, layover_max=300):
|
||||||
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
||||||
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
||||||
return []
|
return []
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue