135 lines
4.8 KiB
Python
135 lines
4.8 KiB
Python
"""Pro Aurum Scraper — Selenium, Stealth Mode."""
|
|
import re
|
|
import random
|
|
import time
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.chrome.service import Service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
OZ = 31.1035
|
|
|
|
GOLD_URL = "https://www.proaurum.de/shop/gold/goldmuenzen-zur-kapitalanlage/"
|
|
SILVER_URL = "https://www.proaurum.de/shop/silber/silbermuenzen/"
|
|
|
|
USER_AGENTS = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36",
|
|
]
|
|
|
|
|
|
def _make_driver() -> webdriver.Chrome:
|
|
opts = Options()
|
|
opts.add_argument(f"--user-agent={random.choice(USER_AGENTS)}")
|
|
opts.add_argument("--headless=new")
|
|
opts.add_argument("--no-sandbox")
|
|
opts.add_argument("--disable-dev-shm-usage")
|
|
opts.add_argument("--disable-blink-features=AutomationControlled")
|
|
opts.add_experimental_option("excludeSwitches", ["enable-automation"])
|
|
opts.add_experimental_option("useAutomationExtension", False)
|
|
driver = webdriver.Chrome(service=Service("/bin/chromedriver"), options=opts)
|
|
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
|
"source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});"
|
|
})
|
|
return driver
|
|
|
|
|
|
def _parse_price(text: str) -> float | None:
|
|
cleaned = text.replace(".", "").replace(",", ".").replace("€", "").strip()
|
|
try:
|
|
return float(cleaned)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def _weight_from_name(name: str) -> float:
|
|
n = name.lower()
|
|
if "10 kg" in n or "10 kilogramm" in n: return 10000.0
|
|
if "1 kg" in n or "kilogramm" in n: return 1000.0
|
|
if "10 unzen" in n or "10 oz" in n: return OZ * 10
|
|
if "1/2" in n: return OZ / 2
|
|
if "1/4" in n: return OZ / 4
|
|
if "1/10" in n: return OZ / 10
|
|
return OZ # Standard 1oz
|
|
|
|
|
|
def _scrape_page(driver: webdriver.Chrome, url: str) -> list[dict]:
|
|
logger.info(f"Lade {url}")
|
|
driver.get(url)
|
|
time.sleep(random.uniform(4, 7))
|
|
|
|
products = []
|
|
# Pro Aurum CSS Modules (Stand 2026-02)
|
|
# productButton-buy = "Kaufen" = Kaufpreis (sell_price, was Kunde zahlt)
|
|
# productButton-sell = "Verkaufen" = Ankaufspreis (buy_price, was Händler zahlt)
|
|
# Preise stehen in buySellSection-price: erst Kauf-, dann Ankaufspreis
|
|
cards = driver.find_elements("css selector", "[class*='product-root']")
|
|
|
|
for card in cards:
|
|
try:
|
|
# Name: Text ohne € Zeichen
|
|
name = ""
|
|
for sel in ["a", "h2", "h3", "[class*='name']"]:
|
|
els = card.find_elements("css selector", sel)
|
|
for el in els:
|
|
t = el.text.strip()
|
|
if t and len(t) > 5 and "€" not in t and "Kaufen" not in t and "Verkaufen" not in t:
|
|
name = t.split("\n")[0].strip()
|
|
break
|
|
if name:
|
|
break
|
|
|
|
# Beide Preise in Reihenfolge holen
|
|
# [0] = Kaufpreis (sell_price), [1] = Ankaufspreis (buy_price)
|
|
price_els = card.find_elements("css selector", "[class*='buySellSection-price']")
|
|
sell = _parse_price(price_els[0].text) if len(price_els) > 0 else None
|
|
buy = _parse_price(price_els[1].text) if len(price_els) > 1 else None
|
|
|
|
# Fallback: alle €-Preise wenn buySellSection nicht gefunden
|
|
if not sell:
|
|
for el in card.find_elements("css selector", "[class*='price'], [class*='Price']"):
|
|
v = _parse_price(el.text)
|
|
if v and v > 100:
|
|
sell = v
|
|
break
|
|
|
|
weight = _weight_from_name(name)
|
|
|
|
if sell and sell > 0:
|
|
products.append({
|
|
"product": name,
|
|
"sell_price": sell,
|
|
"buy_price": buy,
|
|
"weight_g": weight,
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
return products
|
|
|
|
|
|
def scrape() -> dict:
|
|
"""
|
|
Scrapt Pro Aurum. Gibt {'gold': [...], 'silver': [...]} zurück.
|
|
Wirft Exception wenn es scheitert — Fallback wird vom Aufrufer gehandelt.
|
|
"""
|
|
t0 = datetime.now()
|
|
driver = _make_driver()
|
|
try:
|
|
# Zufällige Wartezeit (Anti-Bot)
|
|
wait = random.randint(60, 150)
|
|
logger.info(f"Warte {wait}s vor dem Scraping...")
|
|
time.sleep(wait)
|
|
|
|
gold = _scrape_page(driver, GOLD_URL)
|
|
silver = _scrape_page(driver, SILVER_URL)
|
|
|
|
duration = (datetime.now() - t0).total_seconds()
|
|
logger.info(f"Pro Aurum: {len(gold)} Gold, {len(silver)} Silber in {duration:.0f}s")
|
|
return {"gold": gold, "silver": silver, "source": "proaurum"}
|
|
finally:
|
|
driver.quit()
|