feat: 1000 EUR Plausibilitaets-Schwelle, Vergleichstabelle MU vs Asia, Gepaeck-Filter Kayak
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
52b71d16f4
commit
838e8f4af8
5 changed files with 269 additions and 81 deletions
|
|
@ -33,14 +33,23 @@ def init_db():
|
|||
von TEXT NOT NULL,
|
||||
nach TEXT NOT NULL,
|
||||
tage INTEGER DEFAULT 30,
|
||||
aufenthalt_tage INTEGER DEFAULT 14,
|
||||
aufenthalt_tage INTEGER DEFAULT 60,
|
||||
trip_type TEXT DEFAULT 'roundtrip',
|
||||
kabine TEXT DEFAULT 'premium_economy',
|
||||
gepaeck TEXT DEFAULT '1koffer+handgepaeck',
|
||||
intervall TEXT DEFAULT 'daily',
|
||||
aktiv INTEGER DEFAULT 1,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
# Spalte nachrüsten falls DB schon existiert
|
||||
try:
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN gepaeck TEXT DEFAULT '1koffer+handgepaeck'")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN aufenthalt_tage INTEGER DEFAULT 60")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN trip_type TEXT DEFAULT 'roundtrip'")
|
||||
c.execute("ALTER TABLE jobs ADD COLUMN kabine TEXT DEFAULT 'premium_economy'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS prices (
|
||||
|
|
@ -55,9 +64,15 @@ def init_db():
|
|||
ankunft TEXT,
|
||||
von TEXT,
|
||||
nach TEXT,
|
||||
booking_url TEXT,
|
||||
scraped_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
# Spalte nachrüsten falls DB schon existiert
|
||||
try:
|
||||
c.execute("ALTER TABLE prices ADD COLUMN booking_url TEXT")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS analyses (
|
||||
|
|
@ -94,9 +109,17 @@ def init_db():
|
|||
c.execute("""
|
||||
INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (
|
||||
'ki_auswertung',
|
||||
'Du bist ein Flugpreis-Analyst. Analysiere Preisdaten für ROUNDTRIP Frankfurt (FRA) → Phnom Penh Techo Airport (KTI), Premium Economy. Verbindung typischerweise über Hanoi (HAN). Koffer werden in FRA aufgegeben und in KTI abgeholt (Durchgepäck). Reisedauer ca. 14 Tage.
|
||||
'Du bist ein Flugpreis-Analyst. Analysiere Preisdaten für folgenden Flug:
|
||||
|
||||
Aktuelle Preise heute:
|
||||
STRECKE: ROUNDTRIP Frankfurt (FRA) → Phnom Penh Techo Airport (KTI)
|
||||
KABINE: Premium Economy
|
||||
GEPÄCK: 1 großer Aufgabekoffer + Handgepäck (zwingend inklusive!)
|
||||
UMSTIEG: typischerweise Hanoi (HAN) — Durchgepäck FRA→KTI, kein Umsteigen des Koffers nötig
|
||||
AUFENTHALT: ca. 2 Monate
|
||||
|
||||
WICHTIG: Nur Preise bewerten die 1 Aufgabekoffer enthalten. Premium Economy schließt dies normalerweise ein — bei verdächtig günstigen Preisen (<700 EUR Roundtrip) prüfen ob es sich um reine Handgepäck-Tarife handeln könnte.
|
||||
|
||||
Aktuelle Preise heute (nach Anbieter und Standort des Scanners):
|
||||
{preise_heute}
|
||||
|
||||
Preisverlauf letzte 30 Tage:
|
||||
|
|
@ -110,9 +133,11 @@ Statistik:
|
|||
Antworte auf Deutsch in diesem Format:
|
||||
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
||||
BEGRUENDUNG: [1-2 Sätze warum]
|
||||
BESTER_PREIS: [günstigster Anbieter und Preis]
|
||||
BESTER_PREIS: [günstigster Anbieter, Preis, Scanner-Standort]
|
||||
TREND: [STEIGEND / FALLEND / STABIL]
|
||||
HINWEIS: [Falls Preis unter 600 EUR verdächtig günstig, darauf hinweisen]'
|
||||
GEO_UNTERSCHIED: [Preisunterschied DE-Scanner vs. KH-Scanner falls erkennbar]
|
||||
GEPAECK_WARNUNG: [Ja/Nein — ob Preise möglicherweise kein Aufgabegepäck enthalten]
|
||||
PLAUSI_CHECK: [Preise unter 1000 EUR einzeln bewerten — wahrscheinlich Economy, Hinflug only, oder kein Koffer]'
|
||||
)
|
||||
""")
|
||||
|
||||
|
|
@ -128,10 +153,10 @@ HINWEIS: [Falls Preis unter 600 EUR verdächtig günstig, darauf hinweisen]'
|
|||
job_count = c.execute("SELECT COUNT(*) FROM jobs").fetchone()[0]
|
||||
if job_count == 0:
|
||||
c.execute("""
|
||||
INSERT INTO jobs (scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, intervall) VALUES
|
||||
('google_flights', 'FRA', 'KTI', 30, 14, 'roundtrip', 'premium_economy', 'daily'),
|
||||
('kayak', 'FRA', 'KTI', 30, 14, 'roundtrip', 'premium_economy', 'daily'),
|
||||
('trip', 'FRA', 'KTI', 30, 14, 'roundtrip', 'premium_economy', 'daily')
|
||||
INSERT INTO jobs (scanner, von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck, intervall) VALUES
|
||||
('google_flights', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
||||
('kayak', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily'),
|
||||
('trip', 'FRA', 'KTI', 30, 60, 'roundtrip', 'premium_economy', '1koffer+handgepaeck', 'daily')
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
|
|
|
|||
|
|
@ -53,9 +53,10 @@ def dispatch_job(node, job):
|
|||
"von": job["von"],
|
||||
"nach": job["nach"],
|
||||
"tage": job["tage"],
|
||||
"aufenthalt_tage": job.get("aufenthalt_tage", 14),
|
||||
"aufenthalt_tage": job.get("aufenthalt_tage", 60),
|
||||
"trip_type": job.get("trip_type", "roundtrip"),
|
||||
"kabine": job.get("kabine", "premium_economy"),
|
||||
"gepaeck": job.get("gepaeck", "1koffer+handgepaeck"),
|
||||
}
|
||||
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
||||
try:
|
||||
|
|
@ -83,13 +84,14 @@ def speichere_preise(results, node_name, job):
|
|||
for r in results:
|
||||
conn.execute("""
|
||||
INSERT INTO prices
|
||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
job["id"], r.get("scanner", job["scanner"]), node_name,
|
||||
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
|
||||
r.get("abflug", ""), r.get("ankunft", ""),
|
||||
job["von"], job["nach"]
|
||||
job["von"], job["nach"],
|
||||
r.get("booking_url", "")
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
|
|
|||
162
hub/src/web.py
162
hub/src/web.py
|
|
@ -78,10 +78,16 @@ BASE_HTML = """<!DOCTYPE html>
|
|||
</html>"""
|
||||
|
||||
OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div style="background:#451a03;border:1px solid#92400e;border-radius:8px;padding:0.6rem 1rem;margin-bottom:1.2rem;font-size:0.85rem;color:#fcd34d">
|
||||
✈️ <strong>FRA → KTI</strong> · Roundtrip · Premium Economy · 1 Aufgabekoffer + Handgepäck · ~2 Monate Aufenthalt
|
||||
· <span style="color:#f87171;font-weight:600">⚠ Preise unter 1.000 € bitte manuell prüfen</span>
|
||||
</div>
|
||||
|
||||
<div class="grid-3" style="margin-bottom:1.5rem">
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="min-preis">—</div>
|
||||
<div class="label">Günstigster Preis heute (EUR)</div>
|
||||
<div id="min-preis-warnung" style="display:none;margin-top:0.4rem;font-size:0.75rem;color:#fbbf24">⚠ unter 1.000 € — bitte prüfen</div>
|
||||
</div>
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="avg-preis">—</div>
|
||||
|
|
@ -112,9 +118,30 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
|||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Heutige Preise</h2>
|
||||
<h2>Heutige Preise — Vergleich nach Scanner-Standort</h2>
|
||||
<p style="font-size:0.8rem;color:#64748b;margin-bottom:1rem">
|
||||
Spalten = Scraping-Standort (DE-IP vs. KH-IP) · Preisunterschiede durch geo-basiertes Airline-Pricing möglich
|
||||
</p>
|
||||
<table id="vergleich-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Scanner</th>
|
||||
<th>🇩🇪 Muldenstein (DE)</th>
|
||||
<th>🇰🇭 Kambodscha (KH)</th>
|
||||
<th>Differenz</th>
|
||||
<th>Abflug</th>
|
||||
<th>Rückflug</th>
|
||||
<th>Buchen</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="vergleich-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Alle Preise heute (Detail)</h2>
|
||||
<table>
|
||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Airline</th><th>Zeit</th></tr></thead>
|
||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th></tr></thead>
|
||||
<tbody id="preise-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
|
@ -128,15 +155,32 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
|||
</div>
|
||||
|
||||
<script>
|
||||
const PLAUSI_GRENZE = 1000;
|
||||
|
||||
function preisZelle(preis, booking_url) {
|
||||
if (preis == null) return '<td style="color:#475569">—</td>';
|
||||
const warn = preis < PLAUSI_GRENZE;
|
||||
const farbe = warn ? '#fbbf24' : '#34d399';
|
||||
const badge = warn ? ' <span style="background:#451a03;color:#fbbf24;font-size:0.7rem;padding:0.1rem 0.4rem;border-radius:4px;margin-left:4px" title="Bitte manuell prüfen — möglicherweise falsche Klasse oder kein Aufgabegepäck">⚠ prüfen</span>' : '';
|
||||
const link = booking_url ? `<a href="${booking_url}" target="_blank" style="font-size:0.75rem;color:#38bdf8;margin-left:6px">↗</a>` : '';
|
||||
return `<td><strong style="color:${farbe}">${Math.round(preis)} €</strong>${badge}${link}</td>`;
|
||||
}
|
||||
|
||||
async function ladeUebersicht() {
|
||||
const [stats, ki, preise, nodes] = await Promise.all([
|
||||
const [stats, ki, preise, nodes, vergleich] = await Promise.all([
|
||||
fetch('/api/stats').then(r=>r.json()),
|
||||
fetch('/api/ki/latest').then(r=>r.json()),
|
||||
fetch('/api/preise/heute').then(r=>r.json()),
|
||||
fetch('/api/nodes').then(r=>r.json())
|
||||
fetch('/api/nodes').then(r=>r.json()),
|
||||
fetch('/api/preise/vergleich').then(r=>r.json())
|
||||
]);
|
||||
|
||||
document.getElementById('min-preis').textContent = stats.min_heute ? Math.round(stats.min_heute) : '—';
|
||||
const minHeute = stats.min_heute;
|
||||
document.getElementById('min-preis').textContent = minHeute ? Math.round(minHeute) : '—';
|
||||
document.getElementById('min-preis').style.color = (minHeute && minHeute < PLAUSI_GRENZE) ? '#fbbf24' : '#38bdf8';
|
||||
if (minHeute && minHeute < PLAUSI_GRENZE) {
|
||||
document.getElementById('min-preis-warnung').style.display = 'block';
|
||||
}
|
||||
document.getElementById('avg-preis').textContent = stats.avg_30d ? Math.round(stats.avg_30d) : '—';
|
||||
document.getElementById('node-count').textContent = nodes.filter(n=>n.status==='online').length;
|
||||
|
||||
|
|
@ -150,12 +194,66 @@ async function ladeUebersicht() {
|
|||
document.getElementById('ki-empfehlung').textContent = 'Noch keine Auswertung';
|
||||
}
|
||||
|
||||
// Vergleichstabelle MU vs Asia
|
||||
const vtbody = document.getElementById('vergleich-tbody');
|
||||
if (vergleich.length === 0) {
|
||||
vtbody.innerHTML = '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
} else {
|
||||
vtbody.innerHTML = vergleich.map(v => {
|
||||
const mu = v.nodes['flugscanner-mu'];
|
||||
const asia = v.nodes['flugscanner-asia'];
|
||||
const pMu = mu ? mu.preis : null;
|
||||
const pAsia = asia ? asia.preis : null;
|
||||
let diffHtml = '<td style="color:#475569">—</td>';
|
||||
if (pMu != null && pAsia != null) {
|
||||
const diff = Math.round(pMu - pAsia);
|
||||
const col = diff > 0 ? '#f87171' : diff < 0 ? '#34d399' : '#94a3b8';
|
||||
const sign = diff > 0 ? '+' : '';
|
||||
diffHtml = `<td style="color:${col};font-weight:600">${sign}${diff} €</td>`;
|
||||
}
|
||||
const buchungsUrl = (mu?.booking_url || asia?.booking_url || '');
|
||||
const buchBtn = buchungsUrl
|
||||
? `<td><a href="${buchungsUrl}" target="_blank" class="btn btn-sm" style="text-decoration:none">Öffnen ↗</a></td>`
|
||||
: '<td style="color:#475569">—</td>';
|
||||
const deltaHtml = (node) => {
|
||||
if (!node || node.delta == null) return '';
|
||||
const d = node.delta;
|
||||
return d === 0 ? '' : `<br><span style="font-size:0.7rem;color:${d>0?'#f87171':'#34d399'}">${d>0?'+':''}${d}€ ggü. gestern</span>`;
|
||||
};
|
||||
const muHtml = pMu != null ? `${preisZelle(pMu, mu?.booking_url).replace('<td>','').replace('</td>','')}${deltaHtml(mu)}` : '—';
|
||||
const asiaHtml = pAsia != null ? `${preisZelle(pAsia, asia?.booking_url).replace('<td>','').replace('</td>','')}${deltaHtml(asia)}` : '—';
|
||||
return `<tr>
|
||||
<td><strong>${v.scanner}</strong></td>
|
||||
<td>${muHtml}</td>
|
||||
<td>${asiaHtml}</td>
|
||||
${diffHtml}
|
||||
<td style="font-size:0.85rem">${v.abflug||'—'}</td>
|
||||
<td style="font-size:0.85rem">${v.ankunft||'—'}</td>
|
||||
${buchBtn}
|
||||
</tr>`;
|
||||
}).join('');
|
||||
}
|
||||
|
||||
// Detail-Tabelle
|
||||
const tbody = document.getElementById('preise-tbody');
|
||||
tbody.innerHTML = preise.map(p => `
|
||||
<tr><td>${p.scanner}</td><td>${p.node}</td>
|
||||
<td><strong style="color:#38bdf8">${p.preis} EUR</strong></td>
|
||||
<td>${p.airline||'—'}</td><td>${p.scraped_at}</td></tr>
|
||||
`).join('') || '<tr><td colspan="5" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
tbody.innerHTML = preise.map(p => {
|
||||
const warn = p.preis < PLAUSI_GRENZE;
|
||||
const plausi = warn
|
||||
? '<span style="background:#451a03;color:#fbbf24;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem">⚠ bitte prüfen</span>'
|
||||
: '<span style="background:#064e3b;color:#34d399;padding:0.15rem 0.5rem;border-radius:4px;font-size:0.75rem">✓ plausibel</span>';
|
||||
const buchBtn = p.booking_url
|
||||
? `<a href="${p.booking_url}" target="_blank" class="btn btn-sm" style="text-decoration:none">Öffnen ↗</a>`
|
||||
: '—';
|
||||
return `<tr>
|
||||
<td>${p.scanner}</td>
|
||||
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
|
||||
<td><strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} €</strong></td>
|
||||
<td>${plausi}</td>
|
||||
<td style="font-size:0.85rem">${p.abflug||'—'}</td>
|
||||
<td style="font-size:0.85rem">${p.ankunft||'—'}</td>
|
||||
<td>${buchBtn}</td>
|
||||
</tr>`;
|
||||
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
|
||||
const ntbody = document.getElementById('nodes-tbody');
|
||||
ntbody.innerHTML = nodes.map(n => `
|
||||
|
|
@ -230,6 +328,50 @@ def api_preise_heute():
|
|||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/preise/vergleich")
|
||||
def api_preise_vergleich():
|
||||
"""Pro Scanner: günstigster Preis je Node + Delta zum Vortag."""
|
||||
conn = get_conn()
|
||||
|
||||
# Heute: günstigster Preis pro Scanner+Node
|
||||
heute = conn.execute("""
|
||||
SELECT scanner, node, MIN(preis) as preis, booking_url, abflug, ankunft
|
||||
FROM prices
|
||||
WHERE date(scraped_at) = date('now')
|
||||
GROUP BY scanner, node
|
||||
ORDER BY scanner, preis
|
||||
""").fetchall()
|
||||
|
||||
# Gestern: günstigster Preis pro Scanner+Node
|
||||
gestern = conn.execute("""
|
||||
SELECT scanner, node, MIN(preis) as preis
|
||||
FROM prices
|
||||
WHERE date(scraped_at) = date('now', '-1 day')
|
||||
GROUP BY scanner, node
|
||||
""").fetchall()
|
||||
|
||||
conn.close()
|
||||
|
||||
gestern_map = {(r["scanner"], r["node"]): r["preis"] for r in gestern}
|
||||
|
||||
# Aggregieren: pro Scanner beide Nodes zusammenfassen
|
||||
scanner_map = {}
|
||||
for r in heute:
|
||||
s = r["scanner"]
|
||||
if s not in scanner_map:
|
||||
scanner_map[s] = {"scanner": s, "abflug": r["abflug"],
|
||||
"ankunft": r["ankunft"], "nodes": {}}
|
||||
preis_gestern = gestern_map.get((r["scanner"], r["node"]))
|
||||
delta = round(r["preis"] - preis_gestern, 0) if preis_gestern else None
|
||||
scanner_map[s]["nodes"][r["node"]] = {
|
||||
"preis": r["preis"],
|
||||
"delta": delta,
|
||||
"booking_url": r["booking_url"] or ""
|
||||
}
|
||||
|
||||
return jsonify(list(scanner_map.values()))
|
||||
|
||||
|
||||
@app.route("/api/preise/verlauf")
|
||||
def api_preise_verlauf():
|
||||
conn = get_conn()
|
||||
|
|
|
|||
|
|
@ -18,14 +18,15 @@ def job():
|
|||
von = data.get("von", "FRA")
|
||||
nach = data.get("nach", "KTI")
|
||||
tage = data.get("tage", 30)
|
||||
aufenthalt = data.get("aufenthalt_tage", 14)
|
||||
aufenthalt = data.get("aufenthalt_tage", 60)
|
||||
trip_type = data.get("trip_type", "roundtrip")
|
||||
kabine = data.get("kabine", "premium_economy")
|
||||
gepaeck = data.get("gepaeck", "1koffer+handgepaeck")
|
||||
|
||||
print(f"[{NODE_NAME}] Job: {scanner} {von}→{nach} ({trip_type}, {kabine}, +{tage}Tage/{aufenthalt}Tage)")
|
||||
print(f"[{NODE_NAME}] Job: {scanner} {von}→{nach} ({trip_type}, {kabine}, Gepäck: {gepaeck}, +{tage}Tage/{aufenthalt}Tage)")
|
||||
|
||||
try:
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine)
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine, gepaeck)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||
return jsonify({
|
||||
"results": results,
|
||||
|
|
|
|||
|
|
@ -3,8 +3,9 @@ from datetime import datetime, timedelta
|
|||
import re
|
||||
|
||||
|
||||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=14,
|
||||
trip_type="roundtrip", kabine="premium_economy"):
|
||||
def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
dispatcher = {
|
||||
"google_flights": scrape_google_flights,
|
||||
"kayak": scrape_kayak,
|
||||
|
|
@ -14,7 +15,33 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=14,
|
|||
fn = dispatcher.get(scanner)
|
||||
if not fn:
|
||||
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine)
|
||||
return fn(von, nach, tage, aufenthalt_tage, trip_type, kabine, gepaeck)
|
||||
|
||||
|
||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||
if rueck:
|
||||
return (f"https://www.google.com/travel/flights/search?hl=de&curr=EUR"
|
||||
f"#flt={von}.{nach}.{abflug}*{nach}.{von}.{rueck};c:EUR;e:1;sd:1;t:r;sc:{kc}")
|
||||
return (f"https://www.google.com/travel/flights/search?hl=de&curr=EUR"
|
||||
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:{kc}")
|
||||
|
||||
|
||||
def _booking_url_kayak(von, nach, abflug, rueck, kc, bags=1):
|
||||
# fs=bfc%3D1 = "bags filter: checked bags = 1 free bag included"
|
||||
bag_filter = f"&fs=bfc%3D{bags}" if bags else ""
|
||||
if rueck:
|
||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
||||
return f"https://www.kayak.de/flights/{von}-{nach}/{abflug}?sort=price_a&cabin={kc}¤cy=EUR{bag_filter}"
|
||||
|
||||
|
||||
def _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name):
|
||||
if rueck_fmt:
|
||||
return (f"https://www.trip.com/flights/{von_name}-to-{nach_name}/"
|
||||
f"tickets-{von.lower()}-{nach.lower()}/"
|
||||
f"?DDate1={abflug_fmt}&DDate2={rueck_fmt}&class={kc}&curr=EUR")
|
||||
return (f"https://www.trip.com/flights/{von_name}-to-{nach_name}/"
|
||||
f"tickets-{von.lower()}-{nach.lower()}/"
|
||||
f"?DDate1={abflug_fmt}&class={kc}&curr=EUR")
|
||||
|
||||
|
||||
# ── Kabinen-Codes ──────────────────────────────────────────────────────────────
|
||||
|
|
@ -75,28 +102,23 @@ def _consent_google(sb):
|
|||
return False
|
||||
|
||||
|
||||
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=14,
|
||||
trip_type="roundtrip", kabine="premium_economy"):
|
||||
def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_GOOGLE.get(kabine, "w")
|
||||
booking_url = _booking_url_google(von, nach, abflug, rueck, kc)
|
||||
|
||||
if trip_type == "roundtrip":
|
||||
url = (f"https://www.google.com/travel/flights/search?hl=de&curr=EUR"
|
||||
f"#flt={von}.{nach}.{abflug}*{nach}.{von}.{rueck};c:EUR;e:1;sd:1;t:r;sc:{kc}")
|
||||
else:
|
||||
url = (f"https://www.google.com/travel/flights/search?hl=de&curr=EUR"
|
||||
f"#flt={von}.{nach}.{abflug};c:EUR;e:1;sd:1;t:f;sc:{kc}")
|
||||
|
||||
print(f"[GF] URL: {url[:100]}")
|
||||
print(f"[GF] URL: {booking_url[:100]}")
|
||||
results = []
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
||||
sb.open(url)
|
||||
sb.open(booking_url)
|
||||
sb.sleep(7)
|
||||
|
||||
if _consent_google(sb):
|
||||
sb.open(url)
|
||||
sb.open(booking_url)
|
||||
sb.sleep(8)
|
||||
|
||||
title = sb.get_title()
|
||||
|
|
@ -105,7 +127,6 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=14,
|
|||
body = sb.get_text("body")
|
||||
print(f"[GF] Body-Länge: {len(body)}")
|
||||
|
||||
# Aria-Labels mit €
|
||||
try:
|
||||
elems = sb.find_elements('[aria-label*="€"]', timeout=3)
|
||||
for elem in elems[:20]:
|
||||
|
|
@ -114,40 +135,41 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=14,
|
|||
if p:
|
||||
results.append({"scanner": "google_flights", "preis": p,
|
||||
"waehrung": "EUR", "airline": "",
|
||||
"abflug": abflug, "ankunft": rueck if trip_type == "roundtrip" else ""})
|
||||
"abflug": abflug, "ankunft": rueck,
|
||||
"booking_url": booking_url})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback Regex
|
||||
if not results:
|
||||
results = _preise_aus_body(body, "google_flights", abflug)
|
||||
for r in _preise_aus_body(body, "google_flights", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url
|
||||
results.append(r)
|
||||
|
||||
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
|
||||
|
||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=14,
|
||||
trip_type="roundtrip", kabine="premium_economy"):
|
||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy",
|
||||
gepaeck="1koffer+handgepaeck"):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_KAYAK.get(kabine, "w")
|
||||
# Gepäck-Filter: 1 = mindestens 1 Aufgabekoffer inklusive
|
||||
bags = 1 if "koffer" in gepaeck else 0
|
||||
booking_url = _booking_url_kayak(von, nach, abflug, rueck, kc, bags)
|
||||
|
||||
if trip_type == "roundtrip":
|
||||
url = f"https://www.kayak.de/flights/{von}-{nach}/{abflug}/{rueck}?sort=price_a&cabin={kc}¤cy=EUR"
|
||||
else:
|
||||
url = f"https://www.kayak.de/flights/{von}-{nach}/{abflug}?sort=price_a&cabin={kc}¤cy=EUR"
|
||||
|
||||
print(f"[KY] URL: {url}")
|
||||
print(f"[KY] URL: {booking_url}")
|
||||
results = []
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
||||
sb.open(url)
|
||||
sb.open(booking_url)
|
||||
sb.sleep(15)
|
||||
|
||||
title = sb.get_title()
|
||||
body = sb.get_text("body")
|
||||
print(f"[KY] Title: {title[:80]}")
|
||||
print(f"[KY] Body-500: {body[:300]}")
|
||||
|
||||
for sel in ['.price-text', '.f8F1-price-text', 'div[class*="price"] span',
|
||||
'span[class*="price"]', '.Iqt3', 'div.nrc6-price', '.price']:
|
||||
|
|
@ -159,57 +181,50 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=14,
|
|||
if p:
|
||||
results.append({"scanner": "kayak", "preis": p,
|
||||
"waehrung": "EUR", "airline": "",
|
||||
"abflug": abflug,
|
||||
"ankunft": rueck if trip_type == "roundtrip" else ""})
|
||||
"abflug": abflug, "ankunft": rueck,
|
||||
"booking_url": booking_url})
|
||||
if results:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not results:
|
||||
results = _preise_aus_body(body, "kayak", abflug)
|
||||
for r in _preise_aus_body(body, "kayak", abflug):
|
||||
r["ankunft"] = rueck
|
||||
r["booking_url"] = booking_url
|
||||
results.append(r)
|
||||
|
||||
print(f"[KY] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
|
||||
|
||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=14,
|
||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||
trip_type="roundtrip", kabine="premium_economy"):
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
||||
rueck = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d")
|
||||
abflug_fmt = (datetime.now() + timedelta(days=tage)).strftime("%Y%m%d")
|
||||
rueck_fmt = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y%m%d") if trip_type == "roundtrip" else ""
|
||||
abflug_iso = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
rueck_iso = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d")
|
||||
rueck_iso = (datetime.now() + timedelta(days=tage + aufenthalt_tage)).strftime("%Y-%m-%d") if trip_type == "roundtrip" else ""
|
||||
kc = KABINE_TRIP.get(kabine, "W")
|
||||
|
||||
# trip.com Stadtname-Mapping für URL
|
||||
stadtname = {"FRA": "frankfurt", "HAN": "hanoi", "KTI": "phnom-penh",
|
||||
"PNH": "phnom-penh", "BKK": "bangkok", "SGN": "ho-chi-minh-city"}
|
||||
von_name = stadtname.get(von, von.lower())
|
||||
nach_name = stadtname.get(nach, nach.lower())
|
||||
|
||||
if trip_type == "roundtrip":
|
||||
url = (f"https://www.trip.com/flights/{von_name}-to-{nach_name}/"
|
||||
f"tickets-{von.lower()}-{nach.lower()}/"
|
||||
f"?DDate1={abflug}&DDate2={rueck}&class={kc}&curr=EUR")
|
||||
else:
|
||||
url = (f"https://www.trip.com/flights/{von_name}-to-{nach_name}/"
|
||||
f"tickets-{von.lower()}-{nach.lower()}/"
|
||||
f"?DDate1={abflug}&class={kc}&curr=EUR")
|
||||
|
||||
print(f"[TR] URL: {url}")
|
||||
booking_url = _booking_url_trip(von, nach, abflug_fmt, rueck_fmt, kc, von_name, nach_name)
|
||||
print(f"[TR] URL: {booking_url}")
|
||||
results = []
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox --disable-dev-shm-usage") as sb:
|
||||
sb.open(url)
|
||||
sb.open(booking_url)
|
||||
sb.sleep(12)
|
||||
|
||||
title = sb.get_title()
|
||||
body = sb.get_text("body")
|
||||
print(f"[TR] Title: {title[:80]}")
|
||||
|
||||
# Cookie-Banner
|
||||
for sel in ['button[id*="accept"]', 'button[class*="accept"]',
|
||||
'button[aria-label*="Accept"]', '.cookie-accept', '#onetrust-accept-btn-handler']:
|
||||
'button[aria-label*="Accept"]', '#onetrust-accept-btn-handler']:
|
||||
try:
|
||||
sb.click(sel, timeout=2)
|
||||
sb.sleep(2)
|
||||
|
|
@ -227,15 +242,18 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=14,
|
|||
if p:
|
||||
results.append({"scanner": "trip", "preis": p,
|
||||
"waehrung": "EUR", "airline": "",
|
||||
"abflug": abflug_iso,
|
||||
"ankunft": rueck_iso if trip_type == "roundtrip" else ""})
|
||||
"abflug": abflug_iso, "ankunft": rueck_iso,
|
||||
"booking_url": booking_url})
|
||||
if results:
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not results:
|
||||
results = _preise_aus_body(body, "trip", abflug_iso)
|
||||
for r in _preise_aus_body(body, "trip", abflug_iso):
|
||||
r["ankunft"] = rueck_iso
|
||||
r["booking_url"] = booking_url
|
||||
results.append(r)
|
||||
|
||||
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue