feat: KI-Plausibilitätsprüfung für jeden Preis
- Jeder Preis wird nach dem Scan von der KI einzeln geprüft - plausibel/verdächtig/ungeprüft Status in DB (prices.plausibel) - Fallback auf Regelwerk wenn KI nicht erreichbar - Dashboard: Farbcodierung (grün=PE bestätigt, rot=verdächtig) - Nur plausible Preise in Stats/Vergleich/KI-Auswertung - Scraper: Fallback ohne Gepäck komplett entfernt - Scraper: Mindestpreis 700€ für PE (filtert Economy raus) - Scraper: Kabinen-Verifikation auf jeder Seite Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
28235c9eda
commit
4dde05ff01
5 changed files with 233 additions and 74 deletions
|
|
@ -90,6 +90,8 @@ def init_db():
|
|||
for col_sql in [
|
||||
"ALTER TABLE prices ADD COLUMN booking_url TEXT",
|
||||
"ALTER TABLE prices ADD COLUMN screenshot_id INTEGER",
|
||||
"ALTER TABLE prices ADD COLUMN plausibel INTEGER",
|
||||
"ALTER TABLE prices ADD COLUMN plausi_grund TEXT DEFAULT ''",
|
||||
]:
|
||||
try:
|
||||
c.execute(col_sql)
|
||||
|
|
|
|||
133
hub/src/ki.py
133
hub/src/ki.py
|
|
@ -10,6 +10,138 @@ client = OpenAI(
|
|||
|
||||
MODEL = os.environ.get("AI_MODEL", "openai/gpt-4o-mini")
|
||||
|
||||
PLAUSI_PROMPT = """Du bist ein Flugpreis-Experte. Pruefe jeden der folgenden Preise auf Plausibilitaet.
|
||||
|
||||
KONTEXT:
|
||||
- Strecke: Roundtrip Frankfurt (FRA) → Phnom Penh Techo (KTI), ca. 2 Monate Aufenthalt
|
||||
- Kabinenklasse: PREMIUM ECONOMY (nicht Economy!)
|
||||
- Gepaeck: 1 grosser Koffer + Handgepaeck MUSS inklusive sein
|
||||
- Bevorzugte Airlines: China Southern (CZ), Cathay Pacific (CX), Singapore Airlines (SQ), Thai Airways (TG), Vietnam Airlines (VN)
|
||||
|
||||
PREISREFERENZ fuer Premium Economy Roundtrip FRA-KTI mit Gepaeck:
|
||||
- Sehr guenstig: 900-1200 EUR (seltene Deals, plausibel wenn bekannte Airline)
|
||||
- Normal: 1200-1800 EUR
|
||||
- Teuer: 1800-2500 EUR
|
||||
- Ueber 2500 EUR: zu teuer oder Business Class
|
||||
- UNTER 700 EUR: fast sicher ECONOMY, nicht Premium Economy!
|
||||
- 700-900 EUR: sehr verdaechtig, wahrscheinlich Economy oder ohne Gepaeck
|
||||
|
||||
PRUEFREGELN:
|
||||
1. Preis unter 700 EUR → NICHT PLAUSIBEL (Economy ohne Gepaeck)
|
||||
2. Preis 700-900 EUR → VERDAECHTIG (pruefen ob Economy oder ohne Gepaeck)
|
||||
3. Preis 900-2500 EUR mit bekannter Airline → PLAUSIBEL
|
||||
4. Preis ueber 2500 EUR → VERDAECHTIG (eventuell Business Class)
|
||||
5. Scanner "kayak_multicity" (HKG Stopover): Preise 100-200 EUR hoeher als Direkt ist normal
|
||||
6. Wenn ein Scanner deutlich guenstigere Preise zeigt als alle anderen: VERDAECHTIG
|
||||
|
||||
PREISE ZU PRUEFEN:
|
||||
{preise_liste}
|
||||
|
||||
Antworte NUR mit gueltigem JSON-Array. Fuer jeden Preis:
|
||||
{{"id": <price_id>, "plausibel": true/false, "grund": "<kurze Begruendung auf Deutsch>"}}
|
||||
|
||||
Beispiel:
|
||||
[
|
||||
{{"id": 123, "plausibel": true, "grund": "1350 EUR fuer CX PE Roundtrip ist marktgerecht"}},
|
||||
{{"id": 124, "plausibel": false, "grund": "436 EUR ist Economy-Preis, nicht PE mit Gepaeck"}}
|
||||
]"""
|
||||
|
||||
|
||||
def plausibilitaetspruefung(von="FRA", nach="KTI"):
|
||||
"""Prüft alle ungeprüften Preise des aktuellen Laufs via KI."""
|
||||
log("KI-Plausibilitätsprüfung gestartet")
|
||||
conn = get_conn()
|
||||
|
||||
ungepruefte = conn.execute("""
|
||||
SELECT id, scanner, node, preis, airline, abflug
|
||||
FROM prices
|
||||
WHERE von=? AND nach=?
|
||||
AND plausibel IS NULL
|
||||
AND date(scraped_at) = date('now')
|
||||
ORDER BY preis ASC
|
||||
""", (von, nach)).fetchall()
|
||||
|
||||
if not ungepruefte:
|
||||
log("Keine ungeprüften Preise — Plausibilitätsprüfung übersprungen")
|
||||
conn.close()
|
||||
return
|
||||
|
||||
preise_liste = "\n".join([
|
||||
f" ID {p['id']}: {p['preis']:.0f} EUR — Scanner: {p['scanner']} — "
|
||||
f"Node: {p['node']} — Airline: {p['airline'] or 'k.A.'} — Abflug: {p['abflug']}"
|
||||
for p in ungepruefte
|
||||
])
|
||||
|
||||
prompt = PLAUSI_PROMPT.format(preise_liste=preise_liste)
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=2000,
|
||||
temperature=0.1,
|
||||
)
|
||||
antwort = response.choices[0].message.content.strip()
|
||||
|
||||
# JSON aus Antwort extrahieren (KI gibt manchmal Markdown-Wrapper)
|
||||
if "```" in antwort:
|
||||
antwort = antwort.split("```")[1]
|
||||
if antwort.startswith("json"):
|
||||
antwort = antwort[4:]
|
||||
|
||||
ergebnisse = json.loads(antwort)
|
||||
|
||||
plausibel_count = 0
|
||||
verdaechtig_count = 0
|
||||
|
||||
for e in ergebnisse:
|
||||
pid = e.get("id")
|
||||
ist_plausibel = 1 if e.get("plausibel") else 0
|
||||
grund = e.get("grund", "")[:200]
|
||||
|
||||
conn.execute(
|
||||
"UPDATE prices SET plausibel=?, plausi_grund=? WHERE id=?",
|
||||
(ist_plausibel, grund, pid)
|
||||
)
|
||||
if ist_plausibel:
|
||||
plausibel_count += 1
|
||||
else:
|
||||
verdaechtig_count += 1
|
||||
|
||||
conn.commit()
|
||||
log(f"Plausibilitätsprüfung: {plausibel_count} plausibel, "
|
||||
f"{verdaechtig_count} verdächtig von {len(ungepruefte)} Preisen")
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
log(f"KI-Plausi JSON-Fehler: {e} — Antwort: {antwort[:200]}", "ERROR")
|
||||
# Fallback: regelbasiert markieren
|
||||
_regelbasierte_plausi(conn, ungepruefte)
|
||||
except Exception as e:
|
||||
log(f"KI-Plausi Fehler: {e}", "ERROR")
|
||||
_regelbasierte_plausi(conn, ungepruefte)
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _regelbasierte_plausi(conn, preise):
|
||||
"""Fallback wenn KI nicht erreichbar: einfache Regeln."""
|
||||
log("Regelbasierte Plausibilitätsprüfung als Fallback")
|
||||
for p in preise:
|
||||
preis = p["preis"]
|
||||
if preis < 700:
|
||||
conn.execute("UPDATE prices SET plausibel=0, plausi_grund=? WHERE id=?",
|
||||
("Preis unter 700€ — sehr wahrscheinlich Economy", p["id"]))
|
||||
elif preis < 900:
|
||||
conn.execute("UPDATE prices SET plausibel=0, plausi_grund=? WHERE id=?",
|
||||
("Preis 700-900€ — verdächtig, wahrscheinlich Economy oder ohne Gepäck", p["id"]))
|
||||
elif preis > 3000:
|
||||
conn.execute("UPDATE prices SET plausibel=0, plausi_grund=? WHERE id=?",
|
||||
("Preis über 3000€ — möglicherweise Business Class", p["id"]))
|
||||
else:
|
||||
conn.execute("UPDATE prices SET plausibel=1, plausi_grund=? WHERE id=?",
|
||||
("Preis im erwarteten PE-Bereich", p["id"]))
|
||||
conn.commit()
|
||||
|
||||
|
||||
def get_prompt():
|
||||
conn = get_conn()
|
||||
|
|
@ -29,6 +161,7 @@ def auswerten(von="FRA", nach="KTI"):
|
|||
FROM prices
|
||||
WHERE von=? AND nach=?
|
||||
AND date(scraped_at) = date('now')
|
||||
AND (plausibel = 1 OR plausibel IS NULL)
|
||||
ORDER BY preis ASC
|
||||
""", (von, nach)).fetchall()
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ import requests
|
|||
import schedule
|
||||
from datetime import datetime, timedelta
|
||||
from db import init_db, get_conn, log
|
||||
from ki import auswerten
|
||||
from ki import auswerten, plausibilitaetspruefung
|
||||
|
||||
# Verhindert dass zwei Läufe gleichzeitig laufen
|
||||
_scan_lock = threading.Lock()
|
||||
|
|
@ -184,10 +184,15 @@ def scraping_lauf(label="Standard", flex_tage_liste=None):
|
|||
log(f"Scraping [{label}] fertig — {online}/{len(nodes)} Nodes | "
|
||||
f"{fehler} Fehler | {dauer}s Laufzeit")
|
||||
|
||||
try:
|
||||
plausibilitaetspruefung()
|
||||
except Exception as e:
|
||||
log(f"KI-Plausi-Fehler: {e}", "ERROR")
|
||||
|
||||
try:
|
||||
auswerten()
|
||||
except Exception as e:
|
||||
log(f"KI-Fehler: {e}", "ERROR")
|
||||
log(f"KI-Auswertung-Fehler: {e}", "ERROR")
|
||||
|
||||
log(f"=== Lauf [{label}] beendet ===")
|
||||
|
||||
|
|
|
|||
|
|
@ -89,8 +89,8 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
|||
<div class="grid-3" style="margin-bottom:1.5rem">
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="min-preis">—</div>
|
||||
<div class="label">Günstigster Preis heute (EUR)</div>
|
||||
<div id="min-preis-warnung" style="display:none;margin-top:0.4rem;font-size:0.75rem;color:#fbbf24">⚠ unter 1.000 € — bitte prüfen</div>
|
||||
<div class="label">Günstigster PE-Preis heute (EUR)</div>
|
||||
<div id="min-preis-warnung" style="display:none;margin-top:0.4rem;font-size:0.75rem;color:#34d399">✓ KI-geprüft: nur plausible PE-Preise</div>
|
||||
</div>
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="avg-preis">—</div>
|
||||
|
|
@ -199,10 +199,8 @@ async function ladeUebersicht() {
|
|||
|
||||
const minHeute = stats.min_heute;
|
||||
document.getElementById('min-preis').textContent = minHeute ? Math.round(minHeute) : '—';
|
||||
document.getElementById('min-preis').style.color = (minHeute && minHeute < PLAUSI_GRENZE) ? '#fbbf24' : '#38bdf8';
|
||||
if (minHeute && minHeute < PLAUSI_GRENZE) {
|
||||
document.getElementById('min-preis-warnung').style.display = 'block';
|
||||
}
|
||||
document.getElementById('min-preis').style.color = '#38bdf8';
|
||||
document.getElementById('min-preis-warnung').style.display = minHeute ? 'block' : 'none';
|
||||
document.getElementById('avg-preis').textContent = stats.avg_30d ? Math.round(stats.avg_30d) : '—';
|
||||
document.getElementById('node-count').textContent = nodes.filter(n=>n.status==='online').length;
|
||||
|
||||
|
|
@ -261,26 +259,38 @@ async function ladeUebersicht() {
|
|||
const HOTEL_HKG = 150; // geschätzte Hotel-Kosten HKG in EUR
|
||||
tbody.innerHTML = preise.map(p => {
|
||||
const isMulticity = p.scanner === 'kayak_multicity';
|
||||
const warn = p.preis < PLAUSI_GRENZE;
|
||||
const plausi = warn
|
||||
? '<span style="background:#451a03;color:#fbbf24;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem">⚠ bitte prüfen</span>'
|
||||
: '<span style="background:#064e3b;color:#34d399;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem">✓ plausibel</span>';
|
||||
// KI-Plausibilitätsstatus: 1=plausibel, 0=verdächtig, -1/null=ungeprüft
|
||||
const ps = p.plausi_status !== undefined ? p.plausi_status : (p.plausibel !== undefined ? p.plausibel : -1);
|
||||
const grund = p.plausi_info || p.plausi_grund || '';
|
||||
let plausi;
|
||||
if (ps === 1) {
|
||||
plausi = `<span title="${grund}" style="background:#064e3b;color:#34d399;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem;cursor:help">✓ PE bestätigt</span>`;
|
||||
} else if (ps === 0) {
|
||||
plausi = `<span title="${grund}" style="background:#7f1d1d;color:#fca5a5;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem;cursor:help">✗ ${grund.substring(0,40) || 'verdächtig'}</span>`;
|
||||
} else {
|
||||
plausi = '<span style="background:#451a03;color:#fbbf24;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem">⏳ prüfe...</span>';
|
||||
}
|
||||
const buchBtn = p.booking_url
|
||||
? `<a href="${p.booking_url}" target="_blank" class="btn btn-sm" style="text-decoration:none">Öffnen ↗</a>`
|
||||
: '—';
|
||||
const scannerLabel = isMulticity
|
||||
? `<strong style="color:#818cf8">🇭🇰 HKG Stopover</strong><br><span style="font-size:0.72rem;color:#64748b">+~${HOTEL_HKG}€ Hotel</span>`
|
||||
: p.scanner;
|
||||
const verdaechtig = (ps === 0);
|
||||
const preisFarbe = verdaechtig ? '#ef4444' : (isMulticity ? '#a78bfa' : '#34d399');
|
||||
const gesamtHtml = isMulticity
|
||||
? `<strong style="color:${warn?'#fbbf24':'#a78bfa'}">${p.preis} €</strong><br><span style="font-size:0.75rem;color:#64748b">∑ ~${Math.round(p.preis)+HOTEL_HKG} € inkl. Hotel</span>`
|
||||
: `<strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} €</strong>`;
|
||||
? `<strong style="color:${preisFarbe}">${p.preis} €</strong><br><span style="font-size:0.75rem;color:#64748b">∑ ~${Math.round(p.preis)+HOTEL_HKG} € inkl. Hotel</span>`
|
||||
: `<strong style="color:${preisFarbe}">${p.preis} €</strong>`;
|
||||
const ssBtn = p.screenshot_id
|
||||
? `<button onclick="zeigeScreenshot(${p.screenshot_id},'${p.scanner} · ${p.node} · ${p.abflug||''}')"
|
||||
style="background:#1e3a5f;border:1px solid #2563eb;color:#93c5fd;padding:0.2rem 0.5rem;border-radius:5px;cursor:pointer;font-size:0.8rem">
|
||||
📷
|
||||
</button>`
|
||||
: '<span style="color:#334155;font-size:0.75rem">—</span>';
|
||||
return `<tr${isMulticity?' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"':''}>
|
||||
const rowStyle = verdaechtig
|
||||
? ' style="background:rgba(239,68,68,0.08);border-left:3px solid #ef4444;opacity:0.7"'
|
||||
: (isMulticity ? ' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"' : '');
|
||||
return `<tr${rowStyle}>
|
||||
<td>${scannerLabel}</td>
|
||||
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
|
||||
<td>${gesamtHtml}</td>
|
||||
|
|
@ -290,7 +300,7 @@ async function ladeUebersicht() {
|
|||
<td>${buchBtn}</td>
|
||||
<td>${ssBtn}</td>
|
||||
</tr>`;
|
||||
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
}).join('') || '<tr><td colspan="8" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
|
||||
const ntbody = document.getElementById('nodes-tbody');
|
||||
ntbody.innerHTML = nodes.map(n => `
|
||||
|
|
@ -369,10 +379,10 @@ ladeUebersicht();
|
|||
def api_stats():
|
||||
conn = get_conn()
|
||||
min_heute = conn.execute(
|
||||
"SELECT MIN(preis) as v FROM prices WHERE date(scraped_at)=date('now')"
|
||||
"SELECT MIN(preis) as v FROM prices WHERE date(scraped_at)=date('now') AND (plausibel=1 OR plausibel IS NULL)"
|
||||
).fetchone()["v"]
|
||||
avg_30d = conn.execute(
|
||||
"SELECT AVG(preis) as v FROM prices WHERE scraped_at >= datetime('now','-30 days')"
|
||||
"SELECT AVG(preis) as v FROM prices WHERE scraped_at >= datetime('now','-30 days') AND (plausibel=1 OR plausibel IS NULL)"
|
||||
).fetchone()["v"]
|
||||
conn.close()
|
||||
return jsonify({"min_heute": min_heute, "avg_30d": avg_30d})
|
||||
|
|
@ -399,17 +409,20 @@ def api_preise_heute():
|
|||
# Neuester Scan-Lauf: ab MAX(scraped_at) - 20 Minuten
|
||||
# Damit werden immer die Preise des letzten Laufs gezeigt — alle mit Screenshot
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM prices
|
||||
SELECT *, COALESCE(plausibel, -1) as plausi_status,
|
||||
COALESCE(plausi_grund, '') as plausi_info
|
||||
FROM prices
|
||||
WHERE scraped_at >= datetime(
|
||||
(SELECT MAX(scraped_at) FROM prices WHERE date(scraped_at) = date('now')),
|
||||
'-20 minutes'
|
||||
)
|
||||
ORDER BY preis ASC LIMIT 100
|
||||
""").fetchall()
|
||||
# Fallback: ganzer Tag (z.B. erster Lauf des Tages noch nicht abgeschlossen)
|
||||
if not rows:
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM prices
|
||||
SELECT *, COALESCE(plausibel, -1) as plausi_status,
|
||||
COALESCE(plausi_grund, '') as plausi_info
|
||||
FROM prices
|
||||
WHERE date(scraped_at) = date('now')
|
||||
ORDER BY preis ASC LIMIT 100
|
||||
""").fetchall()
|
||||
|
|
@ -422,7 +435,7 @@ def api_preise_vergleich():
|
|||
"""Pro Scanner: günstigster Preis je Node + Delta zum Vortag."""
|
||||
conn = get_conn()
|
||||
|
||||
# Heute: günstigster Preis pro Scanner+Node — aus letztem Scan-Lauf
|
||||
# Heute: günstigster PLAUSIBLER Preis pro Scanner+Node
|
||||
heute = conn.execute("""
|
||||
SELECT scanner, node, MIN(preis) as preis, booking_url, abflug, ankunft
|
||||
FROM prices
|
||||
|
|
@ -430,6 +443,7 @@ def api_preise_vergleich():
|
|||
(SELECT MAX(scraped_at) FROM prices WHERE date(scraped_at) = date('now')),
|
||||
'-20 minutes'
|
||||
)
|
||||
AND (plausibel = 1 OR plausibel IS NULL)
|
||||
GROUP BY scanner, node
|
||||
ORDER BY scanner, preis
|
||||
""").fetchall()
|
||||
|
|
@ -438,6 +452,7 @@ def api_preise_vergleich():
|
|||
SELECT scanner, node, MIN(preis) as preis, booking_url, abflug, ankunft
|
||||
FROM prices
|
||||
WHERE date(scraped_at) = date('now')
|
||||
AND (plausibel = 1 OR plausibel IS NULL)
|
||||
GROUP BY scanner, node
|
||||
ORDER BY scanner, preis
|
||||
""").fetchall()
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@ from seleniumbase import SB
|
|||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
# ── Qualitätsschwellen ────────────────────────────────────────────────────────
|
||||
# PE Roundtrip FRA→KTI mit Gepäck: realistisch ab ~800€
|
||||
MIN_PREIS_PE_ROUNDTRIP = 700
|
||||
MAX_PREIS_PE_ROUNDTRIP = 12000
|
||||
|
||||
|
||||
def _scrape_disabled(*args, **kwargs):
|
||||
"""Deaktivierter Scanner — gibt leere Ergebnisse zurück."""
|
||||
|
|
@ -9,6 +14,33 @@ def _scrape_disabled(*args, **kwargs):
|
|||
return [], ""
|
||||
|
||||
|
||||
def _validate_results(results, scanner_name, kabine="premium_economy"):
|
||||
"""Qualitätskontrolle: filtert unplausible Preise raus."""
|
||||
if kabine == "premium_economy":
|
||||
before = len(results)
|
||||
results = [r for r in results if MIN_PREIS_PE_ROUNDTRIP <= r["preis"] <= MAX_PREIS_PE_ROUNDTRIP]
|
||||
dropped = before - len(results)
|
||||
if dropped:
|
||||
print(f"[QC/{scanner_name}] {dropped} Preise außerhalb {MIN_PREIS_PE_ROUNDTRIP}-{MAX_PREIS_PE_ROUNDTRIP}€ entfernt (vermutlich Economy oder Fehler)")
|
||||
return results
|
||||
|
||||
|
||||
def _check_cabin_on_page(body, title, kabine="premium_economy"):
|
||||
"""Prüft ob die Seite die gewünschte Kabinenklasse bestätigt."""
|
||||
text = (title + " " + body[:3000]).lower()
|
||||
if kabine == "premium_economy":
|
||||
pe_keywords = ["premium economy", "premium eco", "premiumeconomy",
|
||||
"premium_economy", "kabine: premium", "cabin: premium",
|
||||
"prem eco", "w class"]
|
||||
eco_only = ["economy" in text and "premium" not in text]
|
||||
if any(kw in text for kw in pe_keywords):
|
||||
return True
|
||||
if eco_only[0]:
|
||||
print("[QC] WARNUNG: Seite zeigt 'Economy' OHNE 'Premium' — möglicherweise falsche Kabine!")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck", airline_filter="",
|
||||
|
|
@ -473,12 +505,9 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
bags = 1 if "koffer" in gepaeck else 0
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
booking_url_raw = _booking_url_kayak(von, nach, abflug, rueck, kc, 0,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[KY{airline_label}] URL: {booking_url}")
|
||||
|
||||
|
|
@ -517,19 +546,12 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
|
||||
# Kein Ergebnis mit Bags-Filter → Fallback ohne Filter
|
||||
if not results and bags > 0:
|
||||
print(f"[KY] Kein Ergebnis mit Filtern — Fallback ohne Bags-Filter")
|
||||
sb.open(booking_url_raw)
|
||||
sb.sleep(12)
|
||||
body2 = sb.get_text("body")
|
||||
for r in _preise_aus_body(body2, "kayak", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url_raw
|
||||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
# Kabinen-Verifikation: prüfe ob "Premium Economy" in der Seite steht
|
||||
pe_confirmed = _check_cabin_on_page(body, title, "premium_economy")
|
||||
if not pe_confirmed:
|
||||
print(f"[KY{airline_label}] WARNUNG: Premium Economy nicht auf Seite bestätigt!")
|
||||
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
results = _validate_results(results, f"kayak{airline_label}", "premium_economy")
|
||||
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
|
@ -595,6 +617,11 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
|||
r["booking_url"] = booking_url
|
||||
results.append(r)
|
||||
|
||||
pe_confirmed = _check_cabin_on_page(body, title, "premium_economy")
|
||||
if not pe_confirmed:
|
||||
print("[TR] WARNUNG: Premium Economy nicht auf Seite bestätigt!")
|
||||
|
||||
results = _validate_results(results, "trip", "premium_economy")
|
||||
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
|
@ -635,10 +662,8 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
|
|||
bags = 1 if "koffer" in gepaeck else 0
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
|
||||
booking_url = _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck,
|
||||
kc, bags, airline_filter)
|
||||
booking_url_raw = _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck,
|
||||
kc, 0, airline_filter)
|
||||
booking_url = _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck,
|
||||
kc, bags, airline_filter)
|
||||
|
||||
print(f"[MC{airline_label}] Multi-City via {via}: {abflug} → +1T → {rueck}")
|
||||
print(f"[MC{airline_label}] URL: {booking_url}")
|
||||
|
|
@ -683,19 +708,7 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
|
|||
r["airline"] = airline_filter or via
|
||||
results.append(r)
|
||||
|
||||
# Fallback ohne Bags-Filter
|
||||
if not results and bags > 0:
|
||||
print(f"[MC] Kein Ergebnis mit Bags — Fallback ohne Bags-Filter")
|
||||
sb.open(booking_url_raw)
|
||||
sb.sleep(12)
|
||||
body2 = sb.get_text("body")
|
||||
for r in _preise_aus_body(body2, "kayak_multicity", abflug):
|
||||
if r["preis"] > 600:
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url_raw
|
||||
r["airline"] = airline_filter or via
|
||||
results.append(r)
|
||||
|
||||
results = _validate_results(results, f"multicity{airline_label}", "premium_economy")
|
||||
print(f"[MC{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
|
@ -712,12 +725,9 @@ def scrape_momondo(von, nach, tage=30, aufenthalt_tage=60,
|
|||
if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
bags = 1 if "koffer" in gepaeck else 0
|
||||
booking_url = _booking_url_momondo(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
booking_url_raw = _booking_url_momondo(von, nach, abflug, rueck, kc, 0,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
booking_url = _booking_url_momondo(von, nach, abflug, rueck, kc, bags,
|
||||
layover_min, layover_max, airline_filter,
|
||||
max_flugzeit_h, max_stops)
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[MO{airline_label}] URL: {booking_url}")
|
||||
|
||||
|
|
@ -772,18 +782,11 @@ def scrape_momondo(von, nach, tage=30, aufenthalt_tage=60,
|
|||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
|
||||
# Fallback ohne Bags-Filter
|
||||
if not results and bags > 0:
|
||||
print(f"[MO] Kein Ergebnis — Fallback ohne Bags-Filter")
|
||||
sb.open(booking_url_raw)
|
||||
sb.sleep(12)
|
||||
body2 = sb.get_text("body")
|
||||
for r in _preise_aus_body(body2, "momondo", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url_raw
|
||||
r["airline"] = airline_filter or ""
|
||||
results.append(r)
|
||||
pe_confirmed = _check_cabin_on_page(body, title, "premium_economy")
|
||||
if not pe_confirmed:
|
||||
print(f"[MO{airline_label}] WARNUNG: Premium Economy nicht auf Seite bestätigt!")
|
||||
|
||||
results = _validate_results(results, f"momondo{airline_label}", "premium_economy")
|
||||
print(f"[MO{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
|
@ -929,6 +932,7 @@ def scrape_traveloka(von, nach, tage=30, aufenthalt_tage=60,
|
|||
pass
|
||||
|
||||
results.sort(key=lambda x: x["preis"])
|
||||
results = _validate_results(results, "traveloka", "premium_economy")
|
||||
print(f"[TV] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue