Initial: Hub + Node Grundstruktur
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
commit
3ca727cec3
15 changed files with 1185 additions and 0 deletions
5
hub/.env
Normal file
5
hub/.env
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
OPENROUTER_API_KEY=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
|
||||
AI_MODEL=openai/gpt-4o-mini
|
||||
TELEGRAM_BOT_TOKEN=
|
||||
TELEGRAM_CHAT_ID=674951792
|
||||
DB_PATH=/data/flugscanner.db
|
||||
10
hub/Dockerfile
Normal file
10
hub/Dockerfile
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY src/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY src/ ./src/
|
||||
|
||||
CMD ["python", "src/web.py"]
|
||||
37
hub/docker-compose.yml
Normal file
37
hub/docker-compose.yml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
services:
|
||||
web:
|
||||
build: .
|
||||
container_name: flugscanner-web
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8080:8080"
|
||||
volumes:
|
||||
- ./data:/data
|
||||
- ./src:/app/src
|
||||
environment:
|
||||
- DB_PATH=/data/flugscanner.db
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- AI_MODEL=${AI_MODEL}
|
||||
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
|
||||
- TELEGRAM_CHAT_ID=${TELEGRAM_CHAT_ID:-}
|
||||
command: python /app/src/web.py
|
||||
|
||||
scheduler:
|
||||
build: .
|
||||
container_name: flugscanner-scheduler
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ./data:/data
|
||||
- ./src:/app/src
|
||||
environment:
|
||||
- DB_PATH=/data/flugscanner.db
|
||||
- OPENROUTER_API_KEY=${OPENROUTER_API_KEY}
|
||||
- AI_MODEL=${AI_MODEL}
|
||||
- TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
|
||||
- TELEGRAM_CHAT_ID=${TELEGRAM_CHAT_ID:-}
|
||||
command: python /app/src/scheduler.py
|
||||
depends_on:
|
||||
- web
|
||||
|
||||
volumes:
|
||||
data:
|
||||
140
hub/src/db.py
Normal file
140
hub/src/db.py
Normal file
|
|
@ -0,0 +1,140 @@
|
|||
import sqlite3
|
||||
import os
|
||||
|
||||
DB_PATH = os.environ.get("DB_PATH", "/data/flugscanner.db")
|
||||
|
||||
|
||||
def get_conn():
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def init_db():
|
||||
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
||||
conn = get_conn()
|
||||
c = conn.cursor()
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS nodes (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
tailscale_ip TEXT NOT NULL,
|
||||
location TEXT,
|
||||
last_seen TEXT,
|
||||
status TEXT DEFAULT 'unknown'
|
||||
)
|
||||
""")
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS jobs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
scanner TEXT NOT NULL,
|
||||
von TEXT NOT NULL,
|
||||
nach TEXT NOT NULL,
|
||||
tage INTEGER DEFAULT 30,
|
||||
intervall TEXT DEFAULT 'daily',
|
||||
aktiv INTEGER DEFAULT 1,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS prices (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
job_id INTEGER,
|
||||
scanner TEXT NOT NULL,
|
||||
node TEXT NOT NULL,
|
||||
preis REAL NOT NULL,
|
||||
waehrung TEXT DEFAULT 'EUR',
|
||||
airline TEXT,
|
||||
abflug TEXT,
|
||||
ankunft TEXT,
|
||||
von TEXT,
|
||||
nach TEXT,
|
||||
scraped_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS analyses (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
von TEXT,
|
||||
nach TEXT,
|
||||
guenstigster_preis REAL,
|
||||
guenstigster_anbieter TEXT,
|
||||
ki_empfehlung TEXT,
|
||||
ki_analyse TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS prompts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT UNIQUE NOT NULL,
|
||||
inhalt TEXT NOT NULL,
|
||||
updated_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
|
||||
c.execute("""
|
||||
CREATE TABLE IF NOT EXISTS logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
level TEXT DEFAULT 'INFO',
|
||||
message TEXT,
|
||||
created_at TEXT DEFAULT (datetime('now'))
|
||||
)
|
||||
""")
|
||||
|
||||
# Standard-Prompt
|
||||
c.execute("""
|
||||
INSERT OR IGNORE INTO prompts (name, inhalt) VALUES (
|
||||
'ki_auswertung',
|
||||
'Du bist ein Flugpreis-Analyst. Analysiere die folgenden Preisdaten und gib eine klare Empfehlung.
|
||||
|
||||
Aktuelle Preise heute:
|
||||
{preise_heute}
|
||||
|
||||
Preisverlauf letzte 30 Tage:
|
||||
{preisverlauf}
|
||||
|
||||
Statistik:
|
||||
- Durchschnitt: {avg} EUR
|
||||
- Minimum: {min} EUR
|
||||
- Maximum: {max} EUR
|
||||
|
||||
Antworte auf Deutsch in diesem Format:
|
||||
EMPFEHLUNG: [JETZT BUCHEN / WARTEN / NEUTRAL]
|
||||
BEGRUENDUNG: [1-2 Sätze warum]
|
||||
BESTER_TAG: [Wochentag falls erkennbar]
|
||||
TREND: [STEIGEND / FALLEND / STABIL]'
|
||||
)
|
||||
""")
|
||||
|
||||
# Standard-Nodes
|
||||
c.execute("""
|
||||
INSERT OR IGNORE INTO nodes (name, tailscale_ip, location) VALUES
|
||||
('flugscanner-asia', '100.112.190.22', 'Kambodscha'),
|
||||
('flugscanner-mu', '100.75.182.15', 'Muldenstein DE')
|
||||
""")
|
||||
|
||||
# Standard-Job
|
||||
c.execute("""
|
||||
INSERT OR IGNORE INTO jobs (scanner, von, nach, tage, intervall) VALUES
|
||||
('google_flights', 'FRA', 'PNH', 30, 'daily')
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def log(message, level="INFO"):
|
||||
conn = get_conn()
|
||||
conn.execute(
|
||||
"INSERT INTO logs (level, message) VALUES (?, ?)",
|
||||
(level, message)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
print(f"[{level}] {message}")
|
||||
109
hub/src/ki.py
Normal file
109
hub/src/ki.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
import os
|
||||
import json
|
||||
from openai import OpenAI
|
||||
from db import get_conn, log
|
||||
|
||||
client = OpenAI(
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_key=os.environ.get("OPENROUTER_API_KEY")
|
||||
)
|
||||
|
||||
MODEL = os.environ.get("AI_MODEL", "openai/gpt-4o-mini")
|
||||
|
||||
|
||||
def get_prompt():
|
||||
conn = get_conn()
|
||||
row = conn.execute(
|
||||
"SELECT inhalt FROM prompts WHERE name = 'ki_auswertung'"
|
||||
).fetchone()
|
||||
conn.close()
|
||||
return row["inhalt"] if row else ""
|
||||
|
||||
|
||||
def auswerten(von="FRA", nach="PNH"):
|
||||
log("KI-Auswertung gestartet")
|
||||
conn = get_conn()
|
||||
|
||||
preise_heute = conn.execute("""
|
||||
SELECT scanner, node, preis, airline, abflug
|
||||
FROM prices
|
||||
WHERE von=? AND nach=?
|
||||
AND date(scraped_at) = date('now')
|
||||
ORDER BY preis ASC
|
||||
""", (von, nach)).fetchall()
|
||||
|
||||
preisverlauf = conn.execute("""
|
||||
SELECT date(scraped_at) as tag, MIN(preis) as min_preis, AVG(preis) as avg_preis
|
||||
FROM prices
|
||||
WHERE von=? AND nach=?
|
||||
AND scraped_at >= datetime('now', '-30 days')
|
||||
GROUP BY date(scraped_at)
|
||||
ORDER BY tag
|
||||
""", (von, nach)).fetchall()
|
||||
|
||||
stats = conn.execute("""
|
||||
SELECT AVG(preis) as avg, MIN(preis) as min, MAX(preis) as max
|
||||
FROM prices
|
||||
WHERE von=? AND nach=?
|
||||
AND scraped_at >= datetime('now', '-30 days')
|
||||
""", (von, nach)).fetchone()
|
||||
|
||||
conn.close()
|
||||
|
||||
if not preise_heute:
|
||||
log("Keine Preise für heute — KI-Auswertung übersprungen", "WARN")
|
||||
return
|
||||
|
||||
preise_heute_str = "\n".join([
|
||||
f" {p['scanner']} ({p['node']}): {p['preis']} EUR — {p['airline'] or 'k.A.'}"
|
||||
for p in preise_heute
|
||||
])
|
||||
verlauf_str = "\n".join([
|
||||
f" {p['tag']}: min {p['min_preis']:.0f} EUR, avg {p['avg_preis']:.0f} EUR"
|
||||
for p in preisverlauf
|
||||
])
|
||||
|
||||
prompt_template = get_prompt()
|
||||
prompt = prompt_template.format(
|
||||
preise_heute=preise_heute_str,
|
||||
preisverlauf=verlauf_str,
|
||||
avg=f"{stats['avg']:.0f}" if stats['avg'] else "?",
|
||||
min=f"{stats['min']:.0f}" if stats['min'] else "?",
|
||||
max=f"{stats['max']:.0f}" if stats['max'] else "?"
|
||||
)
|
||||
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=500
|
||||
)
|
||||
analyse = response.choices[0].message.content
|
||||
log(f"KI-Antwort erhalten: {analyse[:100]}...")
|
||||
|
||||
guenstigster = preise_heute[0]
|
||||
empfehlung = ""
|
||||
if "JETZT BUCHEN" in analyse:
|
||||
empfehlung = "JETZT BUCHEN"
|
||||
elif "WARTEN" in analyse:
|
||||
empfehlung = "WARTEN"
|
||||
else:
|
||||
empfehlung = "NEUTRAL"
|
||||
|
||||
conn = get_conn()
|
||||
conn.execute("""
|
||||
INSERT INTO analyses
|
||||
(von, nach, guenstigster_preis, guenstigster_anbieter, ki_empfehlung, ki_analyse)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
von, nach,
|
||||
guenstigster["preis"],
|
||||
f"{guenstigster['scanner']} ({guenstigster['node']})",
|
||||
empfehlung, analyse
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
log("KI-Auswertung gespeichert")
|
||||
|
||||
except Exception as e:
|
||||
log(f"KI-Fehler: {e}", "ERROR")
|
||||
6
hub/src/requirements.txt
Normal file
6
hub/src/requirements.txt
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
flask==3.1.0
|
||||
flask-cors==5.0.0
|
||||
requests==2.32.3
|
||||
schedule==1.2.2
|
||||
apscheduler==3.11.0
|
||||
openai==1.65.0
|
||||
133
hub/src/scheduler.py
Normal file
133
hub/src/scheduler.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
import os
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
import schedule
|
||||
from datetime import datetime
|
||||
from db import init_db, get_conn, log
|
||||
from ki import auswerten
|
||||
|
||||
|
||||
def get_nodes():
|
||||
conn = get_conn()
|
||||
nodes = conn.execute(
|
||||
"SELECT * FROM nodes WHERE status != 'disabled'"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return [dict(n) for n in nodes]
|
||||
|
||||
|
||||
def get_aktive_jobs():
|
||||
conn = get_conn()
|
||||
jobs = conn.execute(
|
||||
"SELECT * FROM jobs WHERE aktiv = 1"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return [dict(j) for j in jobs]
|
||||
|
||||
|
||||
def node_ping(node):
|
||||
try:
|
||||
r = requests.get(
|
||||
f"http://{node['tailscale_ip']}:5010/status",
|
||||
timeout=5
|
||||
)
|
||||
return r.status_code == 200
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def update_node_status(name, status):
|
||||
conn = get_conn()
|
||||
conn.execute(
|
||||
"UPDATE nodes SET status=?, last_seen=datetime('now') WHERE name=?",
|
||||
(status, name)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def dispatch_job(node, job):
|
||||
payload = {
|
||||
"scanner": job["scanner"],
|
||||
"von": job["von"],
|
||||
"nach": job["nach"],
|
||||
"tage": job["tage"]
|
||||
}
|
||||
log(f"Job an {node['name']} ({node['tailscale_ip']}): {payload}")
|
||||
try:
|
||||
r = requests.post(
|
||||
f"http://{node['tailscale_ip']}:5010/job",
|
||||
json=payload,
|
||||
timeout=300
|
||||
)
|
||||
if r.status_code == 200:
|
||||
results = r.json().get("results", [])
|
||||
log(f"{node['name']}: {len(results)} Preise erhalten")
|
||||
speichere_preise(results, node["name"], job)
|
||||
return True
|
||||
else:
|
||||
log(f"{node['name']}: Fehler {r.status_code}", "ERROR")
|
||||
return False
|
||||
except Exception as e:
|
||||
log(f"{node['name']}: Exception {e}", "ERROR")
|
||||
update_node_status(node["name"], "offline")
|
||||
return False
|
||||
|
||||
|
||||
def speichere_preise(results, node_name, job):
|
||||
conn = get_conn()
|
||||
for r in results:
|
||||
conn.execute("""
|
||||
INSERT INTO prices
|
||||
(job_id, scanner, node, preis, waehrung, airline, abflug, ankunft, von, nach)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
job["id"], r.get("scanner", job["scanner"]), node_name,
|
||||
r["preis"], r.get("waehrung", "EUR"), r.get("airline", ""),
|
||||
r.get("abflug", ""), r.get("ankunft", ""),
|
||||
job["von"], job["nach"]
|
||||
))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def taeglich_scrapen():
|
||||
log("=== Täglicher Scraping-Lauf gestartet ===")
|
||||
nodes = get_nodes()
|
||||
jobs = get_aktive_jobs()
|
||||
|
||||
if not nodes:
|
||||
log("Keine aktiven Nodes konfiguriert", "WARN")
|
||||
return
|
||||
|
||||
for node in nodes:
|
||||
if node_ping(node):
|
||||
update_node_status(node["name"], "online")
|
||||
for job in jobs:
|
||||
dispatch_job(node, job)
|
||||
else:
|
||||
log(f"Node {node['name']} nicht erreichbar", "WARN")
|
||||
update_node_status(node["name"], "offline")
|
||||
|
||||
log("Scraping abgeschlossen — KI-Auswertung läuft")
|
||||
auswerten()
|
||||
log("=== Lauf beendet ===")
|
||||
|
||||
|
||||
def run():
|
||||
init_db()
|
||||
log("Scheduler gestartet")
|
||||
|
||||
# Täglich um 06:00
|
||||
schedule.every().day.at("06:00").do(taeglich_scrapen)
|
||||
|
||||
log("Nächster Lauf: täglich 06:00 Uhr")
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(30)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
511
hub/src/web.py
Normal file
511
hub/src/web.py
Normal file
|
|
@ -0,0 +1,511 @@
|
|||
import os
|
||||
import threading
|
||||
from flask import Flask, jsonify, request, render_template_string
|
||||
from flask_cors import CORS
|
||||
from db import init_db, get_conn, log
|
||||
from scheduler import taeglich_scrapen
|
||||
import schedule
|
||||
import time
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# ─── HTML Templates ───────────────────────────────────────────────────────────
|
||||
|
||||
BASE_HTML = """<!DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>✈️ Flugpreisscanner</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<style>
|
||||
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||
body { font-family: 'Segoe UI', sans-serif; background: #0f172a; color: #e2e8f0; min-height: 100vh; }
|
||||
nav { background: #1e293b; padding: 1rem 2rem; display: flex; gap: 2rem; align-items: center; border-bottom: 1px solid #334155; }
|
||||
nav h1 { font-size: 1.2rem; color: #38bdf8; }
|
||||
nav a { color: #94a3b8; text-decoration: none; font-size: 0.9rem; padding: 0.4rem 0.8rem; border-radius: 6px; transition: all 0.2s; }
|
||||
nav a:hover, nav a.active { background: #334155; color: #e2e8f0; }
|
||||
.container { max-width: 1200px; margin: 2rem auto; padding: 0 1.5rem; }
|
||||
.card { background: #1e293b; border-radius: 12px; padding: 1.5rem; margin-bottom: 1.5rem; border: 1px solid #334155; }
|
||||
.card h2 { font-size: 1rem; color: #94a3b8; margin-bottom: 1rem; text-transform: uppercase; letter-spacing: 0.05em; }
|
||||
.badge { display: inline-block; padding: 0.25rem 0.75rem; border-radius: 9999px; font-size: 0.8rem; font-weight: 600; }
|
||||
.badge-green { background: #064e3b; color: #34d399; }
|
||||
.badge-yellow { background: #451a03; color: #fbbf24; }
|
||||
.badge-red { background: #450a0a; color: #f87171; }
|
||||
.badge-blue { background: #0c1a3a; color: #60a5fa; }
|
||||
table { width: 100%; border-collapse: collapse; }
|
||||
th { text-align: left; padding: 0.75rem; border-bottom: 1px solid #334155; color: #64748b; font-size: 0.8rem; text-transform: uppercase; }
|
||||
td { padding: 0.75rem; border-bottom: 1px solid #1e293b; font-size: 0.9rem; }
|
||||
tr:hover td { background: #0f172a; }
|
||||
.btn { background: #2563eb; color: white; border: none; padding: 0.5rem 1.2rem; border-radius: 8px; cursor: pointer; font-size: 0.9rem; }
|
||||
.btn:hover { background: #1d4ed8; }
|
||||
.btn-sm { padding: 0.3rem 0.8rem; font-size: 0.8rem; }
|
||||
.btn-green { background: #059669; }
|
||||
.btn-green:hover { background: #047857; }
|
||||
.btn-red { background: #dc2626; }
|
||||
.btn-red:hover { background: #b91c1c; }
|
||||
.grid-2 { display: grid; grid-template-columns: 1fr 1fr; gap: 1.5rem; }
|
||||
.grid-3 { display: grid; grid-template-columns: repeat(3, 1fr); gap: 1.5rem; }
|
||||
.stat-box { text-align: center; }
|
||||
.stat-box .value { font-size: 2rem; font-weight: 700; color: #38bdf8; }
|
||||
.stat-box .label { font-size: 0.8rem; color: #64748b; margin-top: 0.25rem; }
|
||||
.ki-box { background: #0f172a; border-radius: 8px; padding: 1rem; border-left: 3px solid #38bdf8; }
|
||||
.ki-empfehlung { font-size: 1.3rem; font-weight: 700; margin-bottom: 0.5rem; }
|
||||
.ki-text { color: #94a3b8; font-size: 0.9rem; line-height: 1.6; white-space: pre-wrap; }
|
||||
input, select, textarea { background: #0f172a; border: 1px solid #334155; color: #e2e8f0; padding: 0.5rem 0.8rem; border-radius: 8px; font-size: 0.9rem; width: 100%; margin-bottom: 0.8rem; }
|
||||
textarea { height: 200px; font-family: monospace; resize: vertical; }
|
||||
.log-line { font-family: monospace; font-size: 0.8rem; padding: 0.3rem 0; border-bottom: 1px solid #1e293b; }
|
||||
.log-INFO { color: #94a3b8; }
|
||||
.log-ERROR { color: #f87171; }
|
||||
.log-WARN { color: #fbbf24; }
|
||||
@media (max-width: 768px) { .grid-2, .grid-3 { grid-template-columns: 1fr; } }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<nav>
|
||||
<h1>✈️ Flugpreisscanner</h1>
|
||||
<a href="/" class="{{ 'active' if page == 'overview' else '' }}">Übersicht</a>
|
||||
<a href="/quellen" class="{{ 'active' if page == 'quellen' else '' }}">Quellen</a>
|
||||
<a href="/jobs" class="{{ 'active' if page == 'jobs' else '' }}">Jobs</a>
|
||||
<a href="/prompts" class="{{ 'active' if page == 'prompts' else '' }}">Prompts</a>
|
||||
<a href="/logs" class="{{ 'active' if page == 'logs' else '' }}">Logs</a>
|
||||
</nav>
|
||||
<div class="container">
|
||||
{% block content %}{% endblock %}
|
||||
</div>
|
||||
</body>
|
||||
</html>"""
|
||||
|
||||
OVERVIEW_HTML = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div class="grid-3" style="margin-bottom:1.5rem">
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="min-preis">—</div>
|
||||
<div class="label">Günstigster Preis heute (EUR)</div>
|
||||
</div>
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="avg-preis">—</div>
|
||||
<div class="label">30-Tage-Durchschnitt (EUR)</div>
|
||||
</div>
|
||||
<div class="card stat-box">
|
||||
<div class="value" id="node-count">—</div>
|
||||
<div class="label">Aktive Nodes</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div class="card">
|
||||
<h2>Preisverlauf (30 Tage)</h2>
|
||||
<canvas id="priceChart" height="200"></canvas>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h2>KI-Empfehlung</h2>
|
||||
<div class="ki-box">
|
||||
<div class="ki-empfehlung" id="ki-empfehlung">Lade...</div>
|
||||
<div class="ki-text" id="ki-text"></div>
|
||||
<div style="margin-top:0.5rem;font-size:0.75rem;color:#475569" id="ki-datum"></div>
|
||||
</div>
|
||||
<div style="margin-top:1rem">
|
||||
<button class="btn btn-green" onclick="manuellScrapen()">▶ Jetzt scrapen</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Heutige Preise</h2>
|
||||
<table>
|
||||
<thead><tr><th>Anbieter</th><th>Node</th><th>Preis</th><th>Airline</th><th>Zeit</th></tr></thead>
|
||||
<tbody id="preise-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<div class="card">
|
||||
<h2>Nodes</h2>
|
||||
<table>
|
||||
<thead><tr><th>Name</th><th>Standort</th><th>Tailscale-IP</th><th>Status</th><th>Zuletzt</th></tr></thead>
|
||||
<tbody id="nodes-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
async function ladeUebersicht() {
|
||||
const [stats, ki, preise, nodes] = await Promise.all([
|
||||
fetch('/api/stats').then(r=>r.json()),
|
||||
fetch('/api/ki/latest').then(r=>r.json()),
|
||||
fetch('/api/preise/heute').then(r=>r.json()),
|
||||
fetch('/api/nodes').then(r=>r.json())
|
||||
]);
|
||||
|
||||
document.getElementById('min-preis').textContent = stats.min_heute ? Math.round(stats.min_heute) : '—';
|
||||
document.getElementById('avg-preis').textContent = stats.avg_30d ? Math.round(stats.avg_30d) : '—';
|
||||
document.getElementById('node-count').textContent = nodes.filter(n=>n.status==='online').length;
|
||||
|
||||
if (ki.ki_empfehlung) {
|
||||
const farben = {'JETZT BUCHEN':'#34d399','WARTEN':'#fbbf24','NEUTRAL':'#60a5fa'};
|
||||
document.getElementById('ki-empfehlung').textContent = ki.ki_empfehlung;
|
||||
document.getElementById('ki-empfehlung').style.color = farben[ki.ki_empfehlung] || '#e2e8f0';
|
||||
document.getElementById('ki-text').textContent = ki.ki_analyse;
|
||||
document.getElementById('ki-datum').textContent = 'Stand: ' + ki.created_at;
|
||||
} else {
|
||||
document.getElementById('ki-empfehlung').textContent = 'Noch keine Auswertung';
|
||||
}
|
||||
|
||||
const tbody = document.getElementById('preise-tbody');
|
||||
tbody.innerHTML = preise.map(p => `
|
||||
<tr><td>${p.scanner}</td><td>${p.node}</td>
|
||||
<td><strong style="color:#38bdf8">${p.preis} EUR</strong></td>
|
||||
<td>${p.airline||'—'}</td><td>${p.scraped_at}</td></tr>
|
||||
`).join('') || '<tr><td colspan="5" style="color:#475569;text-align:center">Noch keine Daten heute</td></tr>';
|
||||
|
||||
const ntbody = document.getElementById('nodes-tbody');
|
||||
ntbody.innerHTML = nodes.map(n => `
|
||||
<tr><td>${n.name}</td><td>${n.location||'—'}</td><td>${n.tailscale_ip}</td>
|
||||
<td><span class="badge badge-${n.status==='online'?'green':n.status==='offline'?'red':'yellow'}">${n.status}</span></td>
|
||||
<td>${n.last_seen||'—'}</td></tr>
|
||||
`).join('');
|
||||
|
||||
// Chart
|
||||
const verlauf = await fetch('/api/preise/verlauf').then(r=>r.json());
|
||||
const ctx = document.getElementById('priceChart').getContext('2d');
|
||||
new Chart(ctx, {
|
||||
type: 'line',
|
||||
data: {
|
||||
labels: verlauf.map(v=>v.tag),
|
||||
datasets: [{
|
||||
label: 'Min EUR', data: verlauf.map(v=>v.min_preis),
|
||||
borderColor: '#38bdf8', backgroundColor: 'rgba(56,189,248,0.1)',
|
||||
tension: 0.3, fill: true
|
||||
}]
|
||||
},
|
||||
options: { plugins: { legend: { labels: { color: '#94a3b8' }}},
|
||||
scales: { x: { ticks: { color: '#64748b' }, grid: { color: '#1e293b' }},
|
||||
y: { ticks: { color: '#64748b' }, grid: { color: '#1e293b' }}}}
|
||||
});
|
||||
}
|
||||
|
||||
async function manuellScrapen() {
|
||||
if (!confirm('Scraping jetzt starten?')) return;
|
||||
const r = await fetch('/api/scrape/now', {method:'POST'});
|
||||
const d = await r.json();
|
||||
alert(d.message);
|
||||
}
|
||||
|
||||
ladeUebersicht();
|
||||
</script>
|
||||
""")
|
||||
|
||||
|
||||
# ─── API Endpoints ─────────────────────────────────────────────────────────────
|
||||
|
||||
@app.route("/api/stats")
|
||||
def api_stats():
|
||||
conn = get_conn()
|
||||
min_heute = conn.execute(
|
||||
"SELECT MIN(preis) as v FROM prices WHERE date(scraped_at)=date('now')"
|
||||
).fetchone()["v"]
|
||||
avg_30d = conn.execute(
|
||||
"SELECT AVG(preis) as v FROM prices WHERE scraped_at >= datetime('now','-30 days')"
|
||||
).fetchone()["v"]
|
||||
conn.close()
|
||||
return jsonify({"min_heute": min_heute, "avg_30d": avg_30d})
|
||||
|
||||
|
||||
@app.route("/api/ki/latest")
|
||||
def api_ki_latest():
|
||||
conn = get_conn()
|
||||
row = conn.execute(
|
||||
"SELECT * FROM analyses ORDER BY created_at DESC LIMIT 1"
|
||||
).fetchone()
|
||||
conn.close()
|
||||
return jsonify(dict(row) if row else {})
|
||||
|
||||
|
||||
@app.route("/api/preise/heute")
|
||||
def api_preise_heute():
|
||||
conn = get_conn()
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM prices WHERE date(scraped_at)=date('now') ORDER BY preis ASC LIMIT 50"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/preise/verlauf")
|
||||
def api_preise_verlauf():
|
||||
conn = get_conn()
|
||||
rows = conn.execute("""
|
||||
SELECT date(scraped_at) as tag, MIN(preis) as min_preis, AVG(preis) as avg_preis
|
||||
FROM prices WHERE scraped_at >= datetime('now','-30 days')
|
||||
GROUP BY date(scraped_at) ORDER BY tag
|
||||
""").fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/nodes")
|
||||
def api_nodes():
|
||||
conn = get_conn()
|
||||
rows = conn.execute("SELECT * FROM nodes ORDER BY name").fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/nodes", methods=["POST"])
|
||||
def api_nodes_add():
|
||||
d = request.json
|
||||
conn = get_conn()
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO nodes (name, tailscale_ip, location) VALUES (?,?,?)",
|
||||
(d["name"], d["tailscale_ip"], d.get("location", ""))
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@app.route("/api/jobs")
|
||||
def api_jobs():
|
||||
conn = get_conn()
|
||||
rows = conn.execute("SELECT * FROM jobs ORDER BY id").fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/jobs", methods=["POST"])
|
||||
def api_jobs_add():
|
||||
d = request.json
|
||||
conn = get_conn()
|
||||
conn.execute(
|
||||
"INSERT INTO jobs (scanner, von, nach, tage, intervall) VALUES (?,?,?,?,?)",
|
||||
(d["scanner"], d["von"], d["nach"], d.get("tage", 30), d.get("intervall", "daily"))
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@app.route("/api/jobs/<int:job_id>", methods=["DELETE"])
|
||||
def api_jobs_delete(job_id):
|
||||
conn = get_conn()
|
||||
conn.execute("UPDATE jobs SET aktiv=0 WHERE id=?", (job_id,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@app.route("/api/prompts")
|
||||
def api_prompts():
|
||||
conn = get_conn()
|
||||
rows = conn.execute("SELECT * FROM prompts").fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/prompts/<name>", methods=["PUT"])
|
||||
def api_prompts_update(name):
|
||||
d = request.json
|
||||
conn = get_conn()
|
||||
conn.execute(
|
||||
"UPDATE prompts SET inhalt=?, updated_at=datetime('now') WHERE name=?",
|
||||
(d["inhalt"], name)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return jsonify({"ok": True})
|
||||
|
||||
|
||||
@app.route("/api/logs")
|
||||
def api_logs():
|
||||
conn = get_conn()
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM logs ORDER BY created_at DESC LIMIT 200"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return jsonify([dict(r) for r in rows])
|
||||
|
||||
|
||||
@app.route("/api/scrape/now", methods=["POST"])
|
||||
def api_scrape_now():
|
||||
threading.Thread(target=taeglich_scrapen, daemon=True).start()
|
||||
return jsonify({"message": "Scraping gestartet — läuft im Hintergrund"})
|
||||
|
||||
|
||||
# ─── Seiten ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@app.route("/")
|
||||
def overview():
|
||||
return render_template_string(OVERVIEW_HTML, page="overview")
|
||||
|
||||
|
||||
@app.route("/quellen")
|
||||
def quellen():
|
||||
html = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div class="card">
|
||||
<h2>Route hinzufügen</h2>
|
||||
<div style="display:grid;grid-template-columns:1fr 1fr 1fr 1fr auto;gap:1rem;align-items:end">
|
||||
<div><label style="font-size:0.8rem;color:#64748b">Von</label>
|
||||
<input id="von" value="FRA" placeholder="FRA"></div>
|
||||
<div><label style="font-size:0.8rem;color:#64748b">Nach</label>
|
||||
<input id="nach" value="PNH" placeholder="PNH"></div>
|
||||
<div><label style="font-size:0.8rem;color:#64748b">Scanner</label>
|
||||
<select id="scanner">
|
||||
<option value="google_flights">Google Flights</option>
|
||||
<option value="kayak">Kayak</option>
|
||||
<option value="skyscanner">Skyscanner</option>
|
||||
</select></div>
|
||||
<div><label style="font-size:0.8rem;color:#64748b">Tage voraus</label>
|
||||
<input id="tage" value="30" type="number"></div>
|
||||
<button class="btn btn-green" onclick="addJob()">+ Hinzufügen</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h2>Aktive Jobs</h2>
|
||||
<table>
|
||||
<thead><tr><th>Scanner</th><th>Von</th><th>Nach</th><th>Tage</th><th>Intervall</th><th>Aktion</th></tr></thead>
|
||||
<tbody id="jobs-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
<script>
|
||||
async function ladeJobs() {
|
||||
const jobs = await fetch('/api/jobs').then(r=>r.json());
|
||||
document.getElementById('jobs-tbody').innerHTML = jobs.filter(j=>j.aktiv).map(j=>`
|
||||
<tr><td>${j.scanner}</td><td>${j.von}</td><td>${j.nach}</td>
|
||||
<td>${j.tage}</td><td>${j.intervall}</td>
|
||||
<td><button class="btn btn-red btn-sm" onclick="deleteJob(${j.id})">Löschen</button></td></tr>
|
||||
`).join('') || '<tr><td colspan="6" style="color:#475569;text-align:center">Keine Jobs</td></tr>';
|
||||
}
|
||||
async function addJob() {
|
||||
await fetch('/api/jobs', {method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify({scanner:document.getElementById('scanner').value,
|
||||
von:document.getElementById('von').value, nach:document.getElementById('nach').value,
|
||||
tage:parseInt(document.getElementById('tage').value)})});
|
||||
ladeJobs();
|
||||
}
|
||||
async function deleteJob(id) {
|
||||
await fetch('/api/jobs/'+id, {method:'DELETE'});
|
||||
ladeJobs();
|
||||
}
|
||||
ladeJobs();
|
||||
</script>
|
||||
""")
|
||||
return render_template_string(html, page="quellen")
|
||||
|
||||
|
||||
@app.route("/jobs")
|
||||
def jobs_page():
|
||||
html = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div class="card">
|
||||
<h2>Job-Status</h2>
|
||||
<table>
|
||||
<thead><tr><th>Scanner</th><th>Von</th><th>Nach</th><th>Intervall</th><th>Aktiv</th></tr></thead>
|
||||
<tbody id="jobs-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="card">
|
||||
<h2>Nodes</h2>
|
||||
<table>
|
||||
<thead><tr><th>Name</th><th>IP</th><th>Standort</th><th>Status</th><th>Zuletzt aktiv</th></tr></thead>
|
||||
<tbody id="nodes-tbody"></tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div style="margin-bottom:1rem">
|
||||
<button class="btn" onclick="scrapeNow()">▶ Jetzt alle Jobs ausführen</button>
|
||||
</div>
|
||||
<script>
|
||||
async function laden() {
|
||||
const [jobs, nodes] = await Promise.all([
|
||||
fetch('/api/jobs').then(r=>r.json()),
|
||||
fetch('/api/nodes').then(r=>r.json())
|
||||
]);
|
||||
document.getElementById('jobs-tbody').innerHTML = jobs.map(j=>`
|
||||
<tr><td>${j.scanner}</td><td>${j.von}</td><td>${j.nach}</td><td>${j.intervall}</td>
|
||||
<td><span class="badge badge-${j.aktiv?'green':'red'}">${j.aktiv?'aktiv':'inaktiv'}</span></td></tr>
|
||||
`).join('');
|
||||
document.getElementById('nodes-tbody').innerHTML = nodes.map(n=>`
|
||||
<tr><td>${n.name}</td><td>${n.tailscale_ip}</td><td>${n.location||'—'}</td>
|
||||
<td><span class="badge badge-${n.status==='online'?'green':n.status==='offline'?'red':'yellow'}">${n.status}</span></td>
|
||||
<td>${n.last_seen||'—'}</td></tr>
|
||||
`).join('');
|
||||
}
|
||||
async function scrapeNow() {
|
||||
const r = await fetch('/api/scrape/now',{method:'POST'});
|
||||
const d = await r.json();
|
||||
alert(d.message);
|
||||
}
|
||||
laden();
|
||||
</script>
|
||||
""")
|
||||
return render_template_string(html, page="jobs")
|
||||
|
||||
|
||||
@app.route("/prompts")
|
||||
def prompts_page():
|
||||
html = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div class="card">
|
||||
<h2>KI-Prompt bearbeiten</h2>
|
||||
<p style="color:#64748b;font-size:0.85rem;margin-bottom:1rem">
|
||||
Verfügbare Variablen: {preise_heute}, {preisverlauf}, {avg}, {min}, {max}
|
||||
</p>
|
||||
<textarea id="prompt-inhalt" placeholder="Prompt laden..."></textarea>
|
||||
<button class="btn btn-green" onclick="speichern()">Speichern</button>
|
||||
</div>
|
||||
<script>
|
||||
async function laden() {
|
||||
const prompts = await fetch('/api/prompts').then(r=>r.json());
|
||||
const p = prompts.find(x=>x.name==='ki_auswertung');
|
||||
if (p) document.getElementById('prompt-inhalt').value = p.inhalt;
|
||||
}
|
||||
async function speichern() {
|
||||
await fetch('/api/prompts/ki_auswertung', {method:'PUT',
|
||||
headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify({inhalt: document.getElementById('prompt-inhalt').value})});
|
||||
alert('Gespeichert!');
|
||||
}
|
||||
laden();
|
||||
</script>
|
||||
""")
|
||||
return render_template_string(html, page="prompts")
|
||||
|
||||
|
||||
@app.route("/logs")
|
||||
def logs_page():
|
||||
html = BASE_HTML.replace("{% block content %}{% endblock %}", """
|
||||
<div class="card">
|
||||
<h2>System-Logs</h2>
|
||||
<div id="logs-container" style="max-height:600px;overflow-y:auto"></div>
|
||||
</div>
|
||||
<script>
|
||||
async function laden() {
|
||||
const logs = await fetch('/api/logs').then(r=>r.json());
|
||||
document.getElementById('logs-container').innerHTML = logs.map(l=>
|
||||
`<div class="log-line log-${l.level}">
|
||||
<span style="color:#475569">${l.created_at}</span>
|
||||
<span class="badge badge-${l.level==='ERROR'?'red':l.level==='WARN'?'yellow':'blue'}" style="margin:0 0.5rem">${l.level}</span>
|
||||
${l.message}
|
||||
</div>`
|
||||
).join('') || '<div style="color:#475569;text-align:center;padding:2rem">Keine Logs</div>';
|
||||
}
|
||||
laden();
|
||||
setInterval(laden, 10000);
|
||||
</script>
|
||||
""")
|
||||
return render_template_string(html, page="logs")
|
||||
|
||||
|
||||
# ─── Start ─────────────────────────────────────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
init_db()
|
||||
log("Flugpreisscanner Hub gestartet")
|
||||
|
||||
# Scheduler in eigenem Thread
|
||||
def run_schedule():
|
||||
import schedule as s
|
||||
from scheduler import taeglich_scrapen
|
||||
s.every().day.at("06:00").do(taeglich_scrapen)
|
||||
while True:
|
||||
s.run_pending()
|
||||
time.sleep(30)
|
||||
|
||||
threading.Thread(target=run_schedule, daemon=True).start()
|
||||
app.run(host="0.0.0.0", port=8080, debug=False)
|
||||
1
node/.env.asia
Normal file
1
node/.env.asia
Normal file
|
|
@ -0,0 +1 @@
|
|||
NODE_NAME=flugscanner-asia
|
||||
1
node/.env.mu
Normal file
1
node/.env.mu
Normal file
|
|
@ -0,0 +1 @@
|
|||
NODE_NAME=flugscanner-mu
|
||||
19
node/Dockerfile
Normal file
19
node/Dockerfile
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
FROM python:3.12-slim
|
||||
|
||||
# Chrome-Abhängigkeiten
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
chromium chromium-driver \
|
||||
fonts-liberation libatk-bridge2.0-0 libatk1.0-0 \
|
||||
libcups2 libdbus-1-3 libgdk-pixbuf2.0-0 libnspr4 \
|
||||
libnss3 libx11-xcb1 libxcomposite1 libxdamage1 \
|
||||
libxfixes3 libxrandr2 xdg-utils \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY src/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY src/ ./src/
|
||||
|
||||
CMD ["python", "src/agent.py"]
|
||||
23
node/docker-compose.yml
Normal file
23
node/docker-compose.yml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
services:
|
||||
agent:
|
||||
build: .
|
||||
container_name: flugscanner-agent
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "5010:5010"
|
||||
volumes:
|
||||
- ./src:/app/src
|
||||
environment:
|
||||
- NODE_NAME=${NODE_NAME}
|
||||
shm_size: '2gb'
|
||||
|
||||
novnc:
|
||||
image: theasp/novnc:latest
|
||||
container_name: flugscanner-novnc
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "6080:8080"
|
||||
environment:
|
||||
- DISPLAY_WIDTH=1280
|
||||
- DISPLAY_HEIGHT=900
|
||||
- RUN_XTERM=no
|
||||
39
node/src/agent.py
Normal file
39
node/src/agent.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
import os
|
||||
from flask import Flask, jsonify, request
|
||||
from worker import scrape
|
||||
|
||||
app = Flask(__name__)
|
||||
NODE_NAME = os.environ.get("NODE_NAME", "unknown-node")
|
||||
|
||||
|
||||
@app.route("/status")
|
||||
def status():
|
||||
return jsonify({"status": "online", "node": NODE_NAME})
|
||||
|
||||
|
||||
@app.route("/job", methods=["POST"])
|
||||
def job():
|
||||
data = request.json
|
||||
scanner = data.get("scanner", "google_flights")
|
||||
von = data.get("von", "FRA")
|
||||
nach = data.get("nach", "PNH")
|
||||
tage = data.get("tage", 30)
|
||||
|
||||
print(f"[{NODE_NAME}] Job: {scanner} {von}→{nach} ({tage} Tage)")
|
||||
|
||||
try:
|
||||
results = scrape(scanner, von, nach, tage)
|
||||
print(f"[{NODE_NAME}] {len(results)} Preise gefunden")
|
||||
return jsonify({
|
||||
"results": results,
|
||||
"node": NODE_NAME,
|
||||
"count": len(results)
|
||||
})
|
||||
except Exception as e:
|
||||
print(f"[{NODE_NAME}] Fehler: {e}")
|
||||
return jsonify({"error": str(e), "node": NODE_NAME}), 500
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(f"Node-Agent gestartet: {NODE_NAME}")
|
||||
app.run(host="0.0.0.0", port=5010)
|
||||
2
node/src/requirements.txt
Normal file
2
node/src/requirements.txt
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
flask==3.1.0
|
||||
seleniumbase==4.34.4
|
||||
149
node/src/worker.py
Normal file
149
node/src/worker.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
from seleniumbase import SB
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
|
||||
|
||||
def scrape(scanner, von, nach, tage=30):
|
||||
dispatcher = {
|
||||
"google_flights": scrape_google_flights,
|
||||
"kayak": scrape_kayak,
|
||||
"skyscanner": scrape_skyscanner,
|
||||
}
|
||||
fn = dispatcher.get(scanner)
|
||||
if not fn:
|
||||
raise ValueError(f"Unbekannter Scanner: {scanner}")
|
||||
return fn(von, nach, tage)
|
||||
|
||||
|
||||
def parse_preis(text):
|
||||
"""Zahl aus einem Preis-String extrahieren."""
|
||||
matches = re.findall(r'[\d.,]+', text.replace(',', '.'))
|
||||
for m in matches:
|
||||
try:
|
||||
v = float(m.replace(',', ''))
|
||||
if 50 < v < 10000:
|
||||
return round(v, 2)
|
||||
except Exception:
|
||||
continue
|
||||
return None
|
||||
|
||||
|
||||
def scrape_google_flights(von, nach, tage=30):
|
||||
results = []
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
url = (
|
||||
f"https://www.google.com/travel/flights/search"
|
||||
f"?tfs=CBwQAhojEgoyMDI1LTA2LTA1agcIARIDRlJBcgwIAxIIL20vMDVxeHgQAQ"
|
||||
)
|
||||
# Direkte URL mit Parametern
|
||||
url = (
|
||||
f"https://www.google.com/travel/flights?"
|
||||
f"q=Flights+from+{von}+to+{nach}+on+{abflug}"
|
||||
)
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox") as sb:
|
||||
sb.open(url)
|
||||
sb.sleep(4)
|
||||
|
||||
# Cookie-Banner wegklicken falls vorhanden
|
||||
try:
|
||||
sb.click('button[aria-label*="Accept"]', timeout=3)
|
||||
sb.sleep(1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Preise suchen
|
||||
try:
|
||||
preise_elems = sb.find_elements('li[data-price]', timeout=8)
|
||||
for elem in preise_elems[:10]:
|
||||
preis_str = elem.get_attribute('data-price') or elem.text
|
||||
preis = parse_preis(preis_str)
|
||||
if preis:
|
||||
results.append({
|
||||
"scanner": "google_flights",
|
||||
"preis": preis,
|
||||
"waehrung": "EUR",
|
||||
"airline": "",
|
||||
"abflug": abflug,
|
||||
"ankunft": ""
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: Alle Preis-Texte auf der Seite
|
||||
if not results:
|
||||
try:
|
||||
page_text = sb.get_page_source()
|
||||
# Suche nach EUR-Preisen im HTML
|
||||
matches = re.findall(r'(\d{3,4})\s*€', page_text)
|
||||
for m in matches[:5]:
|
||||
preis = float(m)
|
||||
if 100 < preis < 5000:
|
||||
results.append({
|
||||
"scanner": "google_flights",
|
||||
"preis": preis,
|
||||
"waehrung": "EUR",
|
||||
"airline": "",
|
||||
"abflug": abflug,
|
||||
"ankunft": ""
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def scrape_kayak(von, nach, tage=30):
|
||||
results = []
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%Y-%m-%d")
|
||||
url = f"https://www.kayak.com/flights/{von}-{nach}/{abflug}?sort=price_a"
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox") as sb:
|
||||
sb.open(url)
|
||||
sb.sleep(5)
|
||||
|
||||
try:
|
||||
elems = sb.find_elements('.price-text', timeout=8)
|
||||
for elem in elems[:10]:
|
||||
preis = parse_preis(elem.text)
|
||||
if preis:
|
||||
results.append({
|
||||
"scanner": "kayak",
|
||||
"preis": preis,
|
||||
"waehrung": "EUR",
|
||||
"airline": "",
|
||||
"abflug": abflug,
|
||||
"ankunft": ""
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def scrape_skyscanner(von, nach, tage=30):
|
||||
results = []
|
||||
abflug = (datetime.now() + timedelta(days=tage)).strftime("%y%m%d")
|
||||
url = f"https://www.skyscanner.de/flights/{von.lower()}/{nach.lower()}/{abflug}/"
|
||||
|
||||
with SB(uc=True, headless=True, chromium_arg="--no-sandbox") as sb:
|
||||
sb.open(url)
|
||||
sb.sleep(5)
|
||||
|
||||
try:
|
||||
elems = sb.find_elements('[data-testid="price-label"]', timeout=8)
|
||||
for elem in elems[:10]:
|
||||
preis = parse_preis(elem.text)
|
||||
if preis:
|
||||
results.append({
|
||||
"scanner": "skyscanner",
|
||||
"preis": preis,
|
||||
"waehrung": "EUR",
|
||||
"airline": "",
|
||||
"abflug": abflug,
|
||||
"ankunft": ""
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return results
|
||||
Loading…
Add table
Reference in a new issue