feat: Screenshot-Spalte im Dashboard — Full-Page CDP Screenshots
- worker.py: _take_screenshot() via Chrome CDP (JPEG 55%, max 3000px)
- worker.py: alle Scraper geben (results, screenshot_b64) Tuple zurück
- agent.py: screenshot_b64 in API-Response enthalten
- scheduler.py: speichere_screenshot() Funktion + Verknüpfung mit prices
- db.py: screenshots-Tabelle + screenshot_id FK in prices
- web.py: /api/screenshot/<id> Endpoint (base64→JPEG Response)
- web.py: 📷 Button in Preistabelle → Lightbox mit Full-Page Screenshot
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
parent
2ce7d02bc5
commit
a03a58d01e
5 changed files with 142 additions and 25 deletions
|
|
@ -83,14 +83,29 @@ def init_db():
|
|||
von TEXT,
|
||||
nach TEXT,
|
||||
booking_url TEXT,
|
||||
screenshot_id INTEGER,
|
||||
scraped_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
for col_sql in [
|
||||
"ALTER TABLE prices ADD COLUMN booking_url TEXT",
|
||||
"ALTER TABLE prices ADD COLUMN screenshot_id INTEGER",
|
||||
]:
|
||||
try:
|
||||
c.execute(col_sql)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS screenshots (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
node TEXT,
|
||||
scanner TEXT,
|
||||
screenshot_b64 TEXT,
|
||||
scraped_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
# Spalte nachrüsten falls DB schon existiert
|
||||
try:
|
||||
c.execute("ALTER TABLE prices ADD COLUMN booking_url TEXT")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS analyses (
|
||||
|
|
|
|||
|
|
@ -76,13 +76,16 @@ def dispatch_job(node, job, tage_override=None):
|
|||
timeout=300
|
||||
)
|
||||
if r.status_code == 200:
|
||||
results = r.json().get("results", [])
|
||||
data = r.json()
|
||||
results = data.get("results", [])
|
||||
screenshot_b64 = data.get("screenshot_b64", "")
|
||||
via_label = f" via {job.get('via','')}" if job.get('via') else ""
|
||||
log(f"{node['name']}: {len(results)} Preise ← {job['scanner']}"
|
||||
f"{' ['+job.get('airline_filter','')+']' if job.get('airline_filter') else ''}"
|
||||
f"{via_label}"
|
||||
f"{' +'+str(tage_override)+'T' if tage_override else ''}")
|
||||
speichere_preise(results, node["name"], job)
|
||||
screenshot_id = speichere_screenshot(screenshot_b64, node["name"], job)
|
||||
speichere_preise(results, node["name"], job, screenshot_id)
|
||||
return True
|
||||
else:
|
||||
log(f"{node['name']}: Fehler {r.status_code} bei {job['scanner']}", "ERROR")
|
||||
|
|
@ -93,19 +96,39 @@ def dispatch_job(node, job, tage_override=None):
|
|||
return False
|
||||
|
||||
|
||||
def speichere_preise(results, node_name, job):
|
||||
def speichere_screenshot(screenshot_b64, node_name, job):
|
||||
"""Speichert Screenshot in DB, gibt screenshot_id zurück (oder None)."""
|
||||
if not screenshot_b64:
|
||||
return None
|
||||
try:
|
||||
conn = get_conn()
|
||||
cur = conn.execute("""
|
||||
INSERT INTO screenshots (job_id, node, scanner, screenshot_b64)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""", (job["id"], node_name, job["scanner"], screenshot_b64))
|
||||
screenshot_id = cur.lastrowid
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return screenshot_id
|
||||
except Exception as e:
|
||||
log(f"Screenshot-Speicher-Fehler: {e}", "WARN")
|
||||
return None
|
||||
|
||||
|
||||
def speichere_preise(results, node_name, job, screenshot_id=None):
|
||||
conn = get_conn()
|
||||
for r in results:
|
||||
conn.execute("""
|
||||
INSERT INTO prices
|
||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach, booking_url, screenshot_id)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
job["id"], r.get("scanner", job["scanner"]), node_name,
|
||||
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
|
||||
r.get("abflug", ""), r.get("ankunft", ""),
|
||||
job["von"], job["nach"],
|
||||
r.get("booking_url", "")
|
||||
r.get("booking_url", ""),
|
||||
screenshot_id,
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import os
|
||||
import base64
|
||||
import threading
|
||||
from datetime import datetime
|
||||
from flask import Flask, jsonify, request, render_template_string
|
||||
from flask import Flask, jsonify, request, render_template_string, Response, abort
|
||||
from flask_cors import CORS
|
||||
from db import init_db, get_conn, log
|
||||
from scheduler import scraping_lauf
|
||||
|
|
@ -143,11 +144,24 @@ OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
|||
<div class="card">
|
||||
<h2>Alle Preise heute (Detail)</h2>
|
||||
<table>
|
||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th></tr></thead>
|
||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Plausibilität</th><th>Abflug</th><th>Rückflug</th><th>Buchen</th><th>Screenshot</th></tr></thead>
|
||||
<tbody id="preise-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- Screenshot Lightbox -->
|
||||
<div id="ss-modal" onclick="document.getElementById('ss-modal').style.display='none'"
|
||||
style="display:none;position:fixed;top:0;left:0;width:100%;height:100%;background:rgba(0,0,0,0.92);z-index:9999;overflow:auto;cursor:zoom-out">
|
||||
<div style="text-align:center;padding:1rem">
|
||||
<button onclick="document.getElementById('ss-modal').style.display='none'"
|
||||
style="background:#ef4444;color:white;border:none;padding:0.4rem 1rem;border-radius:6px;cursor:pointer;margin-bottom:0.5rem;font-size:0.9rem">
|
||||
✕ Schließen
|
||||
</button>
|
||||
<div id="ss-info" style="color:#94a3b8;font-size:0.8rem;margin-bottom:0.5rem"></div>
|
||||
<img id="ss-img" src="" style="max-width:100%;border-radius:8px;box-shadow:0 0 40px rgba(0,0,0,0.8)" onclick="event.stopPropagation()">
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Nodes</h2>
|
||||
<table>
|
||||
|
|
@ -260,6 +274,12 @@ async function ladeUebersicht() {
|
|||
const gesamtHtml = isMulticity
|
||||
? `<strong style="color:${warn?'#fbbf24':'#a78bfa'}">${p.preis} €</strong><br><span style="font-size:0.75rem;color:#64748b">∑ ~${Math.round(p.preis)+HOTEL_HKG} € inkl. Hotel</span>`
|
||||
: `<strong style="color:${warn?'#fbbf24':'#34d399'}">${p.preis} €</strong>`;
|
||||
const ssBtn = p.screenshot_id
|
||||
? `<button onclick="zeigeScreenshot(${p.screenshot_id},'${p.scanner} · ${p.node} · ${p.abflug||''}')"
|
||||
style="background:#1e3a5f;border:1px solid #2563eb;color:#93c5fd;padding:0.2rem 0.5rem;border-radius:5px;cursor:pointer;font-size:0.8rem">
|
||||
📷
|
||||
</button>`
|
||||
: '<span style="color:#334155;font-size:0.75rem">—</span>';
|
||||
return `<tr${isMulticity?' style="background:rgba(99,102,241,0.06);border-left:3px solid #6366f1"':''}>
|
||||
<td>${scannerLabel}</td>
|
||||
<td style="font-size:0.8rem;color:#64748b">${p.node}</td>
|
||||
|
|
@ -268,6 +288,7 @@ async function ladeUebersicht() {
|
|||
<td style="font-size:0.85rem">${p.abflug||'—'}</td>
|
||||
<td style="font-size:0.85rem">${p.ankunft||'—'}</td>
|
||||
<td>${buchBtn}</td>
|
||||
<td>${ssBtn}</td>
|
||||
</tr>`;
|
||||
}).join('') || '<tr><td colspan="7" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
|
||||
|
|
@ -312,6 +333,18 @@ async function pruefeScanStatus() {
|
|||
}
|
||||
}
|
||||
|
||||
function zeigeScreenshot(id, label) {
|
||||
const modal = document.getElementById('ss-modal');
|
||||
const img = document.getElementById('ss-img');
|
||||
const info = document.getElementById('ss-info');
|
||||
img.src = '';
|
||||
info.textContent = 'Lade Screenshot...';
|
||||
modal.style.display = 'block';
|
||||
img.onload = () => { info.textContent = label || ''; };
|
||||
img.onerror = () => { info.textContent = '⚠ Screenshot nicht verfügbar'; };
|
||||
img.src = `/api/screenshot/${id}`;
|
||||
}
|
||||
|
||||
async function manuellScrapen() {
|
||||
const r = await fetch('/api/scrape/now', {method:'POST'});
|
||||
const d = await r.json();
|
||||
|
|
@ -537,6 +570,20 @@ def api_scrape_status():
|
|||
return jsonify({"running": _lauf_aktiv})
|
||||
|
||||
|
||||
@app.route("/api/screenshot/<int:screenshot_id>")
|
||||
def api_screenshot(screenshot_id):
|
||||
conn = get_conn()
|
||||
row = conn.execute(
|
||||
"SELECT screenshot_b64 FROM screenshots WHERE id=?", (screenshot_id,)
|
||||
).fetchone()
|
||||
conn.close()
|
||||
if not row or not row["screenshot_b64"]:
|
||||
abort(404)
|
||||
img_data = base64.b64decode(row["screenshot_b64"])
|
||||
return Response(img_data, mimetype="image/jpeg",
|
||||
headers={"Cache-Control": "public, max-age=3600"})
|
||||
|
||||
|
||||
# ─── Seiten ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@app.route("/")
|
||||
|
|
|
|||
|
|
@ -37,14 +37,16 @@ def job():
|
|||
f"Umstieg {layover_min}-{layover_max}min | max {max_flugzeit_h}h/{max_stops} Stopps")
|
||||
|
||||
try:
|
||||
results = scrape(scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||
gepaeck, airline_filter, layover_min, layover_max,
|
||||
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||
results, screenshot_b64 = scrape(
|
||||
scanner, von, nach, tage, aufenthalt, trip_type, kabine,
|
||||
gepaeck, airline_filter, layover_min, layover_max,
|
||||
max_flugzeit_h, max_stops, via, stopover_min_h, stopover_max_h)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise | Screenshot: {len(screenshot_b64)//1024} KB")
|
||||
return jsonify({
|
||||
"results": results,
|
||||
"node": NODE_NAME,
|
||||
"count": len(results)
|
||||
"results": results,
|
||||
"node": NODE_NAME,
|
||||
"count": len(results),
|
||||
"screenshot_b64": screenshot_b64,
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"[{NODE_NAME}] Fehler: {e}")
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
|||
layover_min=120, layover_max=300,
|
||||
max_flugzeit_h=22, max_stops=2,
|
||||
via="", stopover_min_h=20, stopover_max_h=30):
|
||||
"""
|
||||
Gibt (results, screenshot_b64) zurück.
|
||||
results = Liste von Preis-Dicts
|
||||
screenshot_b64 = JPEG Full-Page Screenshot als base64-String (leer wenn Fehler)
|
||||
"""
|
||||
dispatcher = {
|
||||
"google_flights": scrape_google_flights,
|
||||
"kayak": scrape_kayak,
|
||||
|
|
@ -26,6 +31,27 @@ def scrape(scanner, von, nach, tage=30, aufenthalt_tage=60,
|
|||
airline_filter, layover_min, layover_max, max_flugzeit_h, max_stops)
|
||||
|
||||
|
||||
def _take_screenshot(sb):
|
||||
"""Full-Page Screenshot via CDP (JPEG 55%, max 3000px). Gibt base64-String zurück."""
|
||||
try:
|
||||
result = sb.driver.execute_cdp_cmd("Page.captureScreenshot", {
|
||||
"format": "jpeg",
|
||||
"quality": 55,
|
||||
"captureBeyondViewport": True,
|
||||
"clip": {"x": 0, "y": 0, "width": 1280, "height": 3000, "scale": 0.75},
|
||||
})
|
||||
data = result.get("data", "")
|
||||
if data:
|
||||
print(f"[Screenshot] OK — {len(data)//1024} KB base64")
|
||||
return data
|
||||
except Exception as e:
|
||||
print(f"[Screenshot] CDP-Fehler: {e}")
|
||||
try:
|
||||
return sb.driver.get_screenshot_as_base64()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _booking_url_google(von, nach, abflug, rueck, kc):
|
||||
# Hash-Fragment wird von headless Chrome ignoriert → tfs-Parameter nutzen
|
||||
if rueck:
|
||||
|
|
@ -254,7 +280,8 @@ def scrape_google_flights(von, nach, tage=30, aufenthalt_tage=60,
|
|||
|
||||
results = [r for r in results if r["preis"] > 400]
|
||||
print(f"[GF] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
||||
|
||||
def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
||||
|
|
@ -324,7 +351,8 @@ def scrape_kayak(von, nach, tage=30, aufenthalt_tage=60,
|
|||
|
||||
airline_label = f" [{airline_filter}]" if airline_filter else ""
|
||||
print(f"[KY{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
||||
|
||||
def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
||||
|
|
@ -388,7 +416,8 @@ def scrape_trip(von, nach, tage=30, aufenthalt_tage=60,
|
|||
results.append(r)
|
||||
|
||||
print(f"[TR] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
||||
|
||||
def _booking_url_kayak_multicity(von, nach, via, abflug, via_datum, rueck, kc, bags=1, airline=""):
|
||||
|
|
@ -488,7 +517,8 @@ def scrape_kayak_multicity(von, nach, tage=30, aufenthalt_tage=60,
|
|||
results.append(r)
|
||||
|
||||
print(f"[MC{airline_label}] Ergebnis: {[r['preis'] for r in results[:5]]}")
|
||||
return results[:10]
|
||||
screenshot_b64 = _take_screenshot(sb)
|
||||
return results[:10], screenshot_b64
|
||||
|
||||
|
||||
def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
||||
|
|
@ -498,4 +528,4 @@ def scrape_skyscanner(von, nach, tage=30, aufenthalt_tage=60,
|
|||
max_flugzeit_h=22, max_stops=2):
|
||||
"""Skyscanner hat starken Bot-Schutz — übersprungen."""
|
||||
print("[SS] Skyscanner übersprungen (Bot-Detection)")
|
||||
return []
|
||||
return [], ""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue