homelab-brain/edelmetall/code/proaurum.py

135 lines
4.8 KiB
Python

"""Pro Aurum Scraper — Selenium, Stealth Mode."""
import re
import random
import time
import logging
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
logger = logging.getLogger(__name__)
OZ = 31.1035
GOLD_URL = "https://www.proaurum.de/shop/gold/goldmuenzen-zur-kapitalanlage/"
SILVER_URL = "https://www.proaurum.de/shop/silber/silbermuenzen/"
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36",
]
def _make_driver() -> webdriver.Chrome:
opts = Options()
opts.add_argument(f"--user-agent={random.choice(USER_AGENTS)}")
opts.add_argument("--headless=new")
opts.add_argument("--no-sandbox")
opts.add_argument("--disable-dev-shm-usage")
opts.add_argument("--disable-blink-features=AutomationControlled")
opts.add_experimental_option("excludeSwitches", ["enable-automation"])
opts.add_experimental_option("useAutomationExtension", False)
driver = webdriver.Chrome(service=Service("/bin/chromedriver"), options=opts)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});"
})
return driver
def _parse_price(text: str) -> float | None:
cleaned = text.replace(".", "").replace(",", ".").replace("", "").strip()
try:
return float(cleaned)
except ValueError:
return None
def _weight_from_name(name: str) -> float:
n = name.lower()
if "10 kg" in n or "10 kilogramm" in n: return 10000.0
if "1 kg" in n or "kilogramm" in n: return 1000.0
if "10 unzen" in n or "10 oz" in n: return OZ * 10
if "1/2" in n: return OZ / 2
if "1/4" in n: return OZ / 4
if "1/10" in n: return OZ / 10
return OZ # Standard 1oz
def _scrape_page(driver: webdriver.Chrome, url: str) -> list[dict]:
logger.info(f"Lade {url}")
driver.get(url)
time.sleep(random.uniform(4, 7))
products = []
# Pro Aurum CSS Modules (Stand 2026-02)
# productButton-buy = "Kaufen" = Kaufpreis (sell_price, was Kunde zahlt)
# productButton-sell = "Verkaufen" = Ankaufspreis (buy_price, was Händler zahlt)
# Preise stehen in buySellSection-price: erst Kauf-, dann Ankaufspreis
cards = driver.find_elements("css selector", "[class*='product-root']")
for card in cards:
try:
# Name: Text ohne € Zeichen
name = ""
for sel in ["a", "h2", "h3", "[class*='name']"]:
els = card.find_elements("css selector", sel)
for el in els:
t = el.text.strip()
if t and len(t) > 5 and "" not in t and "Kaufen" not in t and "Verkaufen" not in t:
name = t.split("\n")[0].strip()
break
if name:
break
# Beide Preise in Reihenfolge holen
# [0] = Kaufpreis (sell_price), [1] = Ankaufspreis (buy_price)
price_els = card.find_elements("css selector", "[class*='buySellSection-price']")
sell = _parse_price(price_els[0].text) if len(price_els) > 0 else None
buy = _parse_price(price_els[1].text) if len(price_els) > 1 else None
# Fallback: alle €-Preise wenn buySellSection nicht gefunden
if not sell:
for el in card.find_elements("css selector", "[class*='price'], [class*='Price']"):
v = _parse_price(el.text)
if v and v > 100:
sell = v
break
weight = _weight_from_name(name)
if sell and sell > 0:
products.append({
"product": name,
"sell_price": sell,
"buy_price": buy,
"weight_g": weight,
})
except Exception:
continue
return products
def scrape() -> dict:
"""
Scrapt Pro Aurum. Gibt {'gold': [...], 'silver': [...]} zurück.
Wirft Exception wenn es scheitert — Fallback wird vom Aufrufer gehandelt.
"""
t0 = datetime.now()
driver = _make_driver()
try:
# Zufällige Wartezeit (Anti-Bot)
wait = random.randint(60, 150)
logger.info(f"Warte {wait}s vor dem Scraping...")
time.sleep(wait)
gold = _scrape_page(driver, GOLD_URL)
silver = _scrape_page(driver, SILVER_URL)
duration = (datetime.now() - t0).total_seconds()
logger.info(f"Pro Aurum: {len(gold)} Gold, {len(silver)} Silber in {duration:.0f}s")
return {"gold": gold, "silver": silver, "source": "proaurum"}
finally:
driver.quit()