feat: Screenshot-Spalte im Dashboard — Full-Page CDP Screenshots
- worker.py: _take_screenshot() via Chrome CDP (JPEG 55%, max 3000px)
- worker.py: alle Scraper geben (results, screenshot_b64) Tuple zurück
- agent.py: screenshot_b64 in API-Response enthalten
- scheduler.py: speichere_screenshot() Funktion + Verknüpfung mit prices
- db.py: screenshots-Tabelle + screenshot_id FK in prices
- web.py: /api/screenshot/<id> Endpoint (base64→JPEG Response)
- web.py: 📷 Button in Preistabelle → Lightbox mit Full-Page Screenshot
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
2ce7d02bc5
commit
a03a58d01e
5 changed files with 142 additions and 25 deletions
|
|
@ -83,14 +83,29 @@ def init_db():
|
||||||
von TEXT,
|
von TEXT,
|
||||||
nach TEXT,
|
nach TEXT,
|
||||||
booking_url TEXT,
|
booking_url TEXT,
|
||||||
|
screenshot_id INTEGER,
|
||||||
|
scraped_at TEXT DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
for col_sql in [
|
||||||
|
"ALTER TABLE prices ADD COLUMN booking_url TEXT",
|
||||||
|
"ALTER TABLE prices ADD COLUMN screenshot_id INTEGER",
|
||||||
|
]:
|
||||||
|
try:
|
||||||
|
c.execute(col_sql)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
c.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS screenshots (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
job_id INTEGER,
|
||||||
|
node TEXT,
|
||||||
|
scanner TEXT,
|
||||||
|
screenshot_b64 TEXT,
|
||||||
scraped_at TEXT DEFAULT (datetime('now'))
|
scraped_at TEXT DEFAULT (datetime('now'))
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
# Spalte nachrüsten falls DB schon existiert
|
|
||||||
try:
|
|
||||||
c.execute("ALTER TABLE prices ADD COLUMN booking_url TEXT")
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
c.execute("""
|
c.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS analyses (
|
CREATE TABLE IF NOT EXISTS analyses (
|
||||||
|
|
|
||||||
|
|
@ -76,13 +76,16 @@ def dispatch_job(node, job, tage_override=None):
|
||||||
timeout=300
|
timeout=300
|
||||||
)
|
)
|
||||||
if r.status_code == 200:
|
if r.status_code == 200:
|
||||||
results = r.json().get("results", [])
|
data = r.json()
|
||||||
|
results = data.get("results", [])
|
||||||
|
screenshot_b64 = data.get("screenshot_b64", "")
|
||||||
via_label = f" via {job.get('via','')}" if job.get('via') else ""
|
via_label = f" via {job.get('via','')}" if job.get('via') else ""
|
||||||
log(f"{node['name']}: {len(results)} Preise ← {job['scanner']}"
|
log(f"{node['name']}: {len(results)} Preise ← {job['scanner']}"
|
||||||
f"{' ['+job.get('airline_filter','')+']' if job.get('airline_filter') else ''}"
|
f"{' ['+job.get('airline_filter','')+']' if job.get('airline_filter') else ''}"
|
||||||
f"{via_label}"
|
f"{via_label}"
|
||||||
f"{' +'+str(tage_override)+'T' if tage_override else ''}")
|
f"{' +'+str(tage_override)+'T' if tage_override else ''}")
|
||||||
speichere_preise(results, node["name"], job)
|
screenshot_id = speichere_screenshot(screenshot_b64, node["name"], job)
|
||||||
|
speichere_preise(results, node["name"], job, screenshot_id)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
log(f"{node['name']}: Fehler {r.status_code} bei {job['scanner']}", "ERROR")
|
log(f"{node['name']}: Fehler {r.status_code} bei {job['scanner']}", "ERROR")
|
||||||
|
|
@ -93,19 +96,39 @@ def dispatch_job(node, job, tage_override=None):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def speichere_preise(results, node_name, job):
|
def speichere_screenshot(screenshot_b64, node_name, job):
|
||||||
|
"""Speichert Screenshot in DB, gibt screenshot_id zurück (oder None)."""
|
||||||
|
if not screenshot_b64:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
conn = get_conn()
|
||||||
|
cur = conn.execute("""
|
||||||
|
INSERT INTO screenshots (job_id, node, scanner, screenshot_b64)
|
||||||
|
VALUES (?, ?, ?, ?)
|
||||||
|
""", (job["id"], node_name, job["scanner"], screenshot_b64))
|
||||||
|
screenshot_id = cur.lastrowid
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
return screenshot_id
|
||||||
|
except Exception as e:
|
||||||
|
log(f"Screenshot-Speicher-Fehler: {e}", "WARN")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def speichere_preise(results, node_name, job, screenshot_id=None):
|
||||||
conn = get_conn()
|
conn = get_conn()
|
||||||
for r in results:
|
for r in results:
|
||||||
conn.execute("""
|
conn.execute("""
|
||||||
INSERT INTO prices
|
INSERT INTO prices
|
||||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url)
|
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url, screenshot_id)
|
||||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
""", (
|
""", (
|
||||||
job["id"], r.get("scanner", job["scanner"]), node_name,
|
job["id"], r.get("scanner", job["scanner"]), node_name,
|
||||||
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
|
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
|
||||||
r.get("abflug", ""), r.get("ankunft", ""),
|
r.get("abflug", ""), r.get("ankunft", ""),
|
||||||
job["von"], job["nach"],
|
job["von"], job["nach"],
|
||||||
r.get("booking_url", "")
|
r.get("booking_url", ""),
|
||||||
|
screenshot_id,
|
||||||
))
|
))
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
import os
|
import os
|
||||||
|
import base64
|
||||||
import threading
|
import threading
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from flask import Flask, jsonify, request, render_template_string
|
from flask import Flask, jsonify, request, render_template_string, Response, abort
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
from db import init_db, get_conn, log
|
from db import init_db, get_conn, log
|
||||||
from scheduler import scraping_lauf
|
from scheduler import scraping_lauf
|
||||||
|
|
@ -143,11 +144,24 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>Alle Preise heute (Detail)</h2>
|
<h2>Alle Preise heute (Detail)</h2>
|
||||||
<table>
|
<table>
|
||||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th></tr></thead>
|
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th><th>Screenshot</th></tr></thead>
|
||||||
<tbody id="preise-tbody"></tbody>
|
<tbody id="preise-tbody"></tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Screenshot Lightbox -->
|
||||||
|
<div id="ss-modal" onclick="document.getElementById('ss-modal').style.display='none'"
|
||||||
|
style="display:none;position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.92);z-index:9999;overflow:auto;cursor:zoom-out">
|
||||||
|
<div style="text-align:center;padding:1rem">
|
||||||
|
<button onclick="document.getElementById('ss-modal').style.display='none'"
|
||||||
|
style="background:#ef4444;color:white;border:none;padding:0.4rem 1rem;border-radius:6px;cursor:pointer;margin-bottom:0.5rem;font-size:0.9rem">
|
||||||
|
✕ Schließen
|
||||||
|
</button>
|
||||||
|
<div id="ss-info" style="color:#94a3b8;font-size:0.8rem;margin-bottom:0.5rem"></div>
|
||||||
|
<img id="ss-img" src="" style="max-width:100%;border-radius:8px;box-shadow:0 0 40px rgba(0,0,0,0.8)" onclick="event.stopPropagation()">
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<h2>Nodes</h2>
|
<h2>Nodes</h2>
|
||||||
<table>
|
<table>
|
||||||
|
|
@ -260,6 +274,12 @@ async function ladeUebersicht() {
|
||||||
const gesamtHtml = isMulticity
|
const gesamtHtml = isMulticity
|
||||||
? `<strong style="color:${warn?'#fbbf24':'#a78bfa'}">${p.preis} €</strong><br><span style="font-size:0.75rem;color:#64748b">∑ ~${Math.round(p.preis)+HOTEL_HKG} € inkl. Hotel</span>`
|
? `<strong style="color:${warn?'#fbbf24':'#a78bfa'}">${p.preis} €</strong><br><span style="font-size:0.75rem;color:#64748b">∑ ~${Math.round(p.preis)+HOTEL_HKG} € inkl. Hotel</span>`
|
||||||
: `<strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} €</strong>`;
|
: `<strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} €</strong>`;
|
||||||
|
const ssBtn = p.screenshot_id
|
||||||
|
? `<button onclick="zeigeScreenshot(${p.screenshot_id},'${p.scanner} · ${p.node} · ${p.abflug||''}')"
|
||||||
|
style="background:#1e3a5f;border:1px solid #2563eb;color:#93c5fd;padding:0.2rem 0.5rem;border-radius:5px;cursor:pointer;font-size:0.8rem">
|
||||||
|
📷
|
||||||
|
</button>`
|
||||||
|
: '<span style="color:#334155;font-size:0.75rem">—</span>';
|
||||||
return `<tr${isMulticity?' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"':''}>
|
return `<tr${isMulticity?' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"':''}>
|
||||||
<td>${scannerLabel}</td>
|
<td>${scannerLabel}</td>
|
||||||
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
|
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
|
||||||
|
|
@ -268,6 +288,7 @@ async function ladeUebersicht() {
|
||||||
<td style="font-size:0.85rem">${p.abflug||'—'}</td>
|
<td style="font-size:0.85rem">${p.abflug||'—'}</td>
|
||||||
<td style="font-size:0.85rem">${p.ankunft||'—'}</td>
|
<td style="font-size:0.85rem">${p.ankunft||'—'}</td>
|
||||||
<td>${buchBtn}</td>
|
<td>${buchBtn}</td>
|
||||||
|
<td>${ssBtn}</td>
|
||||||
</tr>`;
|
</tr>`;
|
||||||
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||||
|
|
||||||
|
|
@ -312,6 +333,18 @@ async function pruefeScanStatus() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function zeigeScreenshot(id, label) {
|
||||||
|
const modal = document.getElementById('ss-modal');
|
||||||
|
const img = document.getElementById('ss-img');
|
||||||
|
const info = document.getElementById('ss-info');
|
||||||
|
img.src = '';
|
||||||
|
info.textContent = 'Lade Screenshot...';
|
||||||
|
modal.style.display = 'block';
|
||||||
|
img.onload = () => { info.textContent = label || ''; };
|
||||||
|
img.onerror = () => { info.textContent = '⚠ Screenshot nicht verfügbar'; };
|
||||||
|
img.src = `/api/screenshot/${id}`;
|
||||||
|
}
|
||||||
|
|
||||||
async function manuellScrapen() {
|
async function manuellScrapen() {
|
||||||
const r = await fetch('/api/scrape/now', {method:'POST'});
|
const r = await fetch('/api/scrape/now', {method:'POST'});
|
||||||
const d = await r.json();
|
const d = await r.json();
|
||||||
|
|
@ -537,6 +570,20 @@ def api_scrape_status():
|
||||||
return jsonify({"running": _lauf_aktiv})
|
return jsonify({"running": _lauf_aktiv})
|
||||||
|
|
||||||
|
|
||||||
|
@app.route("/api/screenshot/<int:screenshot_id>")
|
||||||
|
def api_screenshot(screenshot_id):
|
||||||
|
conn = get_conn()
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT screenshot_b64 FROM screenshots WHERE id=?", (screenshot_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if not row or not row["screenshot_b64"]:
|
||||||
|
abort(404)
|
||||||
|
img_data = base64.b64decode(row["screenshot_b64"])
|
||||||
|
return Response(img_data, mimetype="image/jpeg",
|
||||||
|
headers={"Cache-Control": "public, max-age=3600"})
|
||||||
|
|
||||||
|
|
||||||
# ─── Seiten ────────────────────────────────────────────────────────────────────
|
# ─── Seiten ────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
|
|
|
||||||
|
|
@ -37,14 +37,16 @@ def job():
|
||||||
f"Umstieg {layover_min}-{layover_max}min | max {max_flugzeit_h}h/{max_stops} Stopps")
|
f"Umstieg {layover_min}-{layover_max}min | max {max_flugzeit_h}h/{max_stops} Stopps")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
results, screenshot_b64 = scrape(
|
||||||
gepaeck, airline_filter, layover_min, layover_max,
|
scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||||
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
|
gepaeck, airline_filter, layover_min, layover_max,
|
||||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
|
||||||
|
print(f"[{NODE_NAME}] {len(results)} Preise | Screenshot: {len(screenshot_b64)//1024} KB")
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"results": results,
|
"results": results,
|
||||||
"node": NODE_NAME,
|
"node": NODE_NAME,
|
||||||
"count": len(results)
|
"count": len(results),
|
||||||
|
"screenshot_b64": screenshot_b64,
|
||||||
})
|
})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[{NODE_NAME}] Fehler: {e}")
|
print(f"[{NODE_NAME}] Fehler: {e}")
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,11 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||||
layover_min=120, layover_max=300,
|
layover_min=120, layover_max=300,
|
||||||
max_flugzeit_h=22, max_stops=2,
|
max_flugzeit_h=22, max_stops=2,
|
||||||
via="", stopover_min_h=20, stopover_max_h=30):
|
via="", stopover_min_h=20, stopover_max_h=30):
|
||||||
|
"""
|
||||||
|
Gibt (results, screenshot_b64) zurück.
|
||||||
|
results = Liste von Preis-Dicts
|
||||||
|
screenshot_b64 = JPEG Full-Page Screenshot als base64-String (leer wenn Fehler)
|
||||||
|
"""
|
||||||
dispatcher = {
|
dispatcher = {
|
||||||
"google_flights": scrape_google_flights,
|
"google_flights": scrape_google_flights,
|
||||||
"kayak": scrape_kayak,
|
"kayak": scrape_kayak,
|
||||||
|
|
@ -26,6 +31,27 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
||||||
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops)
|
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops)
|
||||||
|
|
||||||
|
|
||||||
|
def _take_screenshot(sb):
|
||||||
|
"""Full-Page Screenshot via CDP (JPEG 55%, max 3000px). Gibt base64-String zurück."""
|
||||||
|
try:
|
||||||
|
result = sb.driver.execute_cdp_cmd("Page.captureScreenshot", {
|
||||||
|
"format": "jpeg",
|
||||||
|
"quality": 55,
|
||||||
|
"captureBeyondViewport": True,
|
||||||
|
"clip": {"x": 0, "y": 0, "width": 1280, "height": 3000, "scale": 0.75},
|
||||||
|
})
|
||||||
|
data = result.get("data", "")
|
||||||
|
if data:
|
||||||
|
print(f"[Screenshot] OK — {len(data)//1024} KB base64")
|
||||||
|
return data
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[Screenshot] CDP-Fehler: {e}")
|
||||||
|
try:
|
||||||
|
return sb.driver.get_screenshot_as_base64()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||||
# Hash-Fragment wird von headless Chrome ignoriert → tfs-Parameter nutzen
|
# Hash-Fragment wird von headless Chrome ignoriert → tfs-Parameter nutzen
|
||||||
if rueck:
|
if rueck:
|
||||||
|
|
@ -254,7 +280,8 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
||||||
results = [r for r in results if r["preis"] > 400]
|
results = [r for r in results if r["preis"] > 400]
|
||||||
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||||
return results[:10]
|
screenshot_b64 = _take_screenshot(sb)
|
||||||
|
return results[:10], screenshot_b64
|
||||||
|
|
||||||
|
|
||||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
@ -324,7 +351,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
||||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||||
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||||
return results[:10]
|
screenshot_b64 = _take_screenshot(sb)
|
||||||
|
return results[:10], screenshot_b64
|
||||||
|
|
||||||
|
|
||||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
@ -388,7 +416,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||||
return results[:10]
|
screenshot_b64 = _take_screenshot(sb)
|
||||||
|
return results[:10], screenshot_b64
|
||||||
|
|
||||||
|
|
||||||
def _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck, kc, bags=1, airline=""):
|
def _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck, kc, bags=1, airline=""):
|
||||||
|
|
@ -488,7 +517,8 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
results.append(r)
|
results.append(r)
|
||||||
|
|
||||||
print(f"[MC{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
print(f"[MC{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||||
return results[:10]
|
screenshot_b64 = _take_screenshot(sb)
|
||||||
|
return results[:10], screenshot_b64
|
||||||
|
|
||||||
|
|
||||||
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
|
|
@ -498,4 +528,4 @@ def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||||
max_flugzeit_h=22, max_stops=2):
|
max_flugzeit_h=22, max_stops=2):
|
||||||
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
||||||
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
||||||
return []
|
return [], ""
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue