"""Pro Aurum Scraper — Selenium, Stealth Mode.""" import re import random import time import logging from datetime import datetime from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service logger = logging.getLogger(__name__) OZ = 31.1035 GOLD_URL = "https://www.proaurum.de/shop/gold/goldmuenzen-zur-kapitalanlage/" SILVER_URL = "https://www.proaurum.de/shop/silber/silbermuenzen/" USER_AGENTS = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/121.0.0.0 Safari/537.36", ] def _make_driver() -> webdriver.Chrome: opts = Options() opts.add_argument(f"--user-agent={random.choice(USER_AGENTS)}") opts.add_argument("--headless=new") opts.add_argument("--no-sandbox") opts.add_argument("--disable-dev-shm-usage") opts.add_argument("--disable-blink-features=AutomationControlled") opts.add_experimental_option("excludeSwitches", ["enable-automation"]) opts.add_experimental_option("useAutomationExtension", False) driver = webdriver.Chrome(service=Service("/bin/chromedriver"), options=opts) driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { "source": "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});" }) return driver def _parse_price(text: str) -> float | None: cleaned = text.replace(".", "").replace(",", ".").replace("€", "").strip() try: return float(cleaned) except ValueError: return None def _weight_from_name(name: str) -> float: n = name.lower() if "10 kg" in n or "10 kilogramm" in n: return 10000.0 if "1 kg" in n or "kilogramm" in n: return 1000.0 if "10 unzen" in n or "10 oz" in n: return OZ * 10 if "1/2" in n: return OZ / 2 if "1/4" in n: return OZ / 4 if "1/10" in n: return OZ / 10 return OZ # Standard 1oz def _scrape_page(driver: webdriver.Chrome, url: str) -> list[dict]: logger.info(f"Lade {url}") driver.get(url) time.sleep(random.uniform(4, 7)) products = [] # Pro Aurum CSS Modules (Stand 2026-02) # productButton-buy = "Kaufen" = Kaufpreis (sell_price, was Kunde zahlt) # productButton-sell = "Verkaufen" = Ankaufspreis (buy_price, was Händler zahlt) # Preise stehen in buySellSection-price: erst Kauf-, dann Ankaufspreis cards = driver.find_elements("css selector", "[class*='product-root']") for card in cards: try: # Name: Text ohne € Zeichen name = "" for sel in ["a", "h2", "h3", "[class*='name']"]: els = card.find_elements("css selector", sel) for el in els: t = el.text.strip() if t and len(t) > 5 and "€" not in t and "Kaufen" not in t and "Verkaufen" not in t: name = t.split("\n")[0].strip() break if name: break # Beide Preise in Reihenfolge holen # [0] = Kaufpreis (sell_price), [1] = Ankaufspreis (buy_price) price_els = card.find_elements("css selector", "[class*='buySellSection-price']") sell = _parse_price(price_els[0].text) if len(price_els) > 0 else None buy = _parse_price(price_els[1].text) if len(price_els) > 1 else None # Fallback: alle €-Preise wenn buySellSection nicht gefunden if not sell: for el in card.find_elements("css selector", "[class*='price'], [class*='Price']"): v = _parse_price(el.text) if v and v > 100: sell = v break weight = _weight_from_name(name) if sell and sell > 0: products.append({ "product": name, "sell_price": sell, "buy_price": buy, "weight_g": weight, }) except Exception: continue return products def scrape() -> dict: """ Scrapt Pro Aurum. Gibt {'gold': [...], 'silver': [...]} zurück. Wirft Exception wenn es scheitert — Fallback wird vom Aufrufer gehandelt. """ t0 = datetime.now() driver = _make_driver() try: # Zufällige Wartezeit (Anti-Bot) wait = random.randint(60, 150) logger.info(f"Warte {wait}s vor dem Scraping...") time.sleep(wait) gold = _scrape_page(driver, GOLD_URL) silver = _scrape_page(driver, SILVER_URL) duration = (datetime.now() - t0).total_seconds() logger.info(f"Pro Aurum: {len(gold)} Gold, {len(silver)} Silber in {duration:.0f}s") return {"gold": gold, "silver": silver, "source": "proaurum"} finally: driver.quit()