feat: Screenshot-Spalte im Dashboard — Full-Page CDP Screenshots

- worker.py: _take_screenshot() via Chrome CDP (JPEG 55%, max 3000px)
- worker.py: alle Scraper geben (results, screenshot_b64) Tuple zurück
- agent.py: screenshot_b64 in API-Response enthalten
- scheduler.py: speichere_screenshot() Funktion + Verknüpfung mit prices
- db.py: screenshots-Tabelle + screenshot_id FK in prices
- web.py: /api/screenshot/<id> Endpoint (base64→JPEG Response)
- web.py: 📷 Button in Preistabelle → Lightbox mit Full-Page Screenshot

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Cursor 2026-02-25 20:51:10 +07:00
parent 2ce7d02bc5
commit a03a58d01e
5 changed files with 142 additions and 25 deletions

View file

@ -83,14 +83,29 @@ def init_db():
von TEXT,
nach TEXT,
booking_url TEXT,
screenshot_id INTEGER,
scraped_at TEXT DEFAULT (datetime('now'))
)
""")
for col_sql in [
"ALTER TABLE prices ADD COLUMN booking_url TEXT",
"ALTER TABLE prices ADD COLUMN screenshot_id INTEGER",
]:
try:
c.execute(col_sql)
except Exception:
pass
c.execute("""
CREATE TABLE IF NOT EXISTS screenshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
job_id INTEGER,
node TEXT,
scanner TEXT,
screenshot_b64 TEXT,
scraped_at TEXT DEFAULT (datetime('now'))
)
""")
# Spalte nachrüsten falls DB schon existiert
try:
c.execute("ALTER TABLE prices ADD COLUMN booking_url TEXT")
except Exception:
pass
c.execute("""
CREATE TABLE IF NOT EXISTS analyses (

View file

@ -76,13 +76,16 @@ def dispatch_job(node, job, tage_override=None):
timeout=300
)
if r.status_code == 200:
results = r.json().get("results", [])
data = r.json()
results = data.get("results", [])
screenshot_b64 = data.get("screenshot_b64", "")
via_label = f" via {job.get('via','')}" if job.get('via') else ""
log(f"{node['name']}: {len(results)} Preise ← {job['scanner']}"
f"{' ['+job.get('airline_filter','')+']' if job.get('airline_filter') else ''}"
f"{via_label}"
f"{' +'+str(tage_override)+'T' if tage_override else ''}")
speichere_preise(results, node["name"], job)
screenshot_id = speichere_screenshot(screenshot_b64, node["name"], job)
speichere_preise(results, node["name"], job, screenshot_id)
return True
else:
log(f"{node['name']}: Fehler {r.status_code} bei {job['scanner']}", "ERROR")
@ -93,19 +96,39 @@ def dispatch_job(node, job, tage_override=None):
return False
def speichere_preise(results, node_name, job):
def speichere_screenshot(screenshot_b64, node_name, job):
"""Speichert Screenshot in DB, gibt screenshot_id zurück (oder None)."""
if not screenshot_b64:
return None
try:
conn = get_conn()
cur = conn.execute("""
INSERT INTO screenshots (job_id, node, scanner, screenshot_b64)
VALUES (?, ?, ?, ?)
""", (job["id"], node_name, job["scanner"], screenshot_b64))
screenshot_id = cur.lastrowid
conn.commit()
conn.close()
return screenshot_id
except Exception as e:
log(f"Screenshot-Speicher-Fehler: {e}", "WARN")
return None
def speichere_preise(results, node_name, job, screenshot_id=None):
conn = get_conn()
for r in results:
conn.execute("""
INSERT INTO prices
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url, screenshot_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
job["id"], r.get("scanner", job["scanner"]), node_name,
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
r.get("abflug", ""), r.get("ankunft", ""),
job["von"], job["nach"],
r.get("booking_url", "")
r.get("booking_url", ""),
screenshot_id,
))
conn.commit()
conn.close()

View file

@ -1,7 +1,8 @@
import os
import base64
import threading
from datetime import datetime
from flask import Flask, jsonify, request, render_template_string
from flask import Flask, jsonify, request, render_template_string, Response, abort
from flask_cors import CORS
from db import init_db, get_conn, log
from scheduler import scraping_lauf
@ -143,11 +144,24 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
<div class="card">
<h2>Alle Preise heute (Detail)</h2>
<table>
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th></tr></thead>
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th><th>Screenshot</th></tr></thead>
<tbody id="preise-tbody"></tbody>
</table>
</div>
<!-- Screenshot Lightbox -->
<div id="ss-modal" onclick="document.getElementById('ss-modal').style.display='none'"
style="display:none;position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.92);z-index:9999;overflow:auto;cursor:zoom-out">
<div style="text-align:center;padding:1rem">
<button onclick="document.getElementById('ss-modal').style.display='none'"
style="background:#ef4444;color:white;border:none;padding:0.4rem 1rem;border-radius:6px;cursor:pointer;margin-bottom:0.5rem;font-size:0.9rem">
Schließen
</button>
<div id="ss-info" style="color:#94a3b8;font-size:0.8rem;margin-bottom:0.5rem"></div>
<img id="ss-img" src="" style="max-width:100%;border-radius:8px;box-shadow:0 0 40px rgba(0,0,0,0.8)" onclick="event.stopPropagation()">
</div>
</div>
<div class="card">
<h2>Nodes</h2>
<table>
@ -260,6 +274,12 @@ async function ladeUebersicht() {
const gesamtHtml = isMulticity
? `<strong style="color:${warn?'#fbbf24':'#a78bfa'}">${p.preis} </strong><br><span style="font-size:0.75rem;color:#64748b"> ~${Math.round(p.preis)+HOTEL_HKG} inkl. Hotel</span>`
: `<strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} </strong>`;
const ssBtn = p.screenshot_id
? `<button onclick="zeigeScreenshot(${p.screenshot_id},'${p.scanner} · ${p.node} · ${p.abflug||''}')"
style="background:#1e3a5f;border:1px solid #2563eb;color:#93c5fd;padding:0.2rem 0.5rem;border-radius:5px;cursor:pointer;font-size:0.8rem">
📷
</button>`
: '<span style="color:#334155;font-size:0.75rem">—</span>';
return `<tr${isMulticity?' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"':''}>
<td>${scannerLabel}</td>
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
@ -268,6 +288,7 @@ async function ladeUebersicht() {
<td style="font-size:0.85rem">${p.abflug||''}</td>
<td style="font-size:0.85rem">${p.ankunft||''}</td>
<td>${buchBtn}</td>
<td>${ssBtn}</td>
</tr>`;
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
@ -312,6 +333,18 @@ async function pruefeScanStatus() {
}
}
function zeigeScreenshot(id, label) {
const modal = document.getElementById('ss-modal');
const img = document.getElementById('ss-img');
const info = document.getElementById('ss-info');
img.src = '';
info.textContent = 'Lade Screenshot...';
modal.style.display = 'block';
img.onload = () => { info.textContent = label || ''; };
img.onerror = () => { info.textContent = '⚠ Screenshot nicht verfügbar'; };
img.src = `/api/screenshot/${id}`;
}
async function manuellScrapen() {
const r = await fetch('/api/scrape/now', {method:'POST'});
const d = await r.json();
@ -537,6 +570,20 @@ def api_scrape_status():
return jsonify({"running": _lauf_aktiv})
@app.route("/api/screenshot/<int:screenshot_id>")
def api_screenshot(screenshot_id):
conn = get_conn()
row = conn.execute(
"SELECT screenshot_b64 FROM screenshots WHERE id=?", (screenshot_id,)
).fetchone()
conn.close()
if not row or not row["screenshot_b64"]:
abort(404)
img_data = base64.b64decode(row["screenshot_b64"])
return Response(img_data, mimetype="image/jpeg",
headers={"Cache-Control": "public, max-age=3600"})
# ─── Seiten ────────────────────────────────────────────────────────────────────
@app.route("/")

View file

@ -37,14 +37,16 @@ def job():
f"Umstieg {layover_min}-{layover_max}min | max {max_flugzeit_h}h/{max_stops} Stopps")
try:
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
gepaeck, airline_filter, layover_min, layover_max,
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
results, screenshot_b64 = scrape(
scanner, von, nach, tage, aufenthalt, trip_type, kabine,
gepaeck, airline_filter, layover_min, layover_max,
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
print(f"[{NODE_NAME}] {len(results)} Preise | Screenshot: {len(screenshot_b64)//1024} KB")
return jsonify({
"results": results,
"node": NODE_NAME,
"count": len(results)
"results": results,
"node": NODE_NAME,
"count": len(results),
"screenshot_b64": screenshot_b64,
})
except Exception as e:
print(f"[{NODE_NAME}] Fehler: {e}")

View file

@ -9,6 +9,11 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
layover_min=120, layover_max=300,
max_flugzeit_h=22, max_stops=2,
via="", stopover_min_h=20, stopover_max_h=30):
"""
Gibt (results, screenshot_b64) zurück.
results = Liste von Preis-Dicts
screenshot_b64 = JPEG Full-Page Screenshot als base64-String (leer wenn Fehler)
"""
dispatcher = {
"google_flights": scrape_google_flights,
"kayak": scrape_kayak,
@ -26,6 +31,27 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops)
def _take_screenshot(sb):
"""Full-Page Screenshot via CDP (JPEG 55%, max 3000px). Gibt base64-String zurück."""
try:
result = sb.driver.execute_cdp_cmd("Page.captureScreenshot", {
"format": "jpeg",
"quality": 55,
"captureBeyondViewport": True,
"clip": {"x": 0, "y": 0, "width": 1280, "height": 3000, "scale": 0.75},
})
data = result.get("data", "")
if data:
print(f"[Screenshot] OK — {len(data)//1024} KB base64")
return data
except Exception as e:
print(f"[Screenshot] CDP-Fehler: {e}")
try:
return sb.driver.get_screenshot_as_base64()
except Exception:
return ""
def _booking_url_google(von, nach, abflug, rueck, kc):
# Hash-Fragment wird von headless Chrome ignoriert → tfs-Parameter nutzen
if rueck:
@ -254,7 +280,8 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
results = [r for r in results if r["preis"] > 400]
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
return results[:10]
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
@ -324,7 +351,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
airline_label = f" [{airline_filter}]" if airline_filter else ""
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
return results[:10]
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
@ -388,7 +416,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
results.append(r)
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
return results[:10]
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck, kc, bags=1, airline=""):
@ -488,7 +517,8 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
results.append(r)
print(f"[MC{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
return results[:10]
screenshot_b64 = _take_screenshot(sb)
return results[:10], screenshot_b64
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
@ -498,4 +528,4 @@ def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
max_flugzeit_h=22, max_stops=2):
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
print("[SS] Skyscanner übersprungen (Bot-Detection)")
return []
return [], ""