From 6288bedc20bf2cab37a2329f8647f12dd5311ff3 Mon Sep 17 00:00:00 2001 From: Auto-Sync Date: Tue, 24 Feb 2026 07:16:53 +0100 Subject: [PATCH] Add: Tech-Trends Workflow (HN + GitHub Trending) --- arakava-news/reddit_trends.py | 290 ++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 arakava-news/reddit_trends.py diff --git a/arakava-news/reddit_trends.py b/arakava-news/reddit_trends.py new file mode 100644 index 00000000..6750eb34 --- /dev/null +++ b/arakava-news/reddit_trends.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python3 +""" +Tech-Trends Workflow (Hacker News + GitHub Trending) +Täglich: Holt Top/New Stories von HN + GitHub Trending Repos, +analysiert mit OpenRouter, postet deutschen Artikel mit Quellenlinks nach WordPress. +""" + +import requests +import base64 +import logging +import os +import re +from datetime import datetime, timedelta, timezone + +# ── Konfiguration ────────────────────────────────────────────── +WP_URL = "https://arakava-news-2.orbitalo.net" +WP_USER = "admin" +WP_PASS = "eJIyhW0p5PFacjvvKGufKeXS" +OPENROUTER_KEY = "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" +TELEGRAM_TOKEN = "8551565940:AAHIUpZND-tCNGv9yEoNPRyPt4GxEPYBJdE" +TELEGRAM_CHAT = "674951792" +LOG_FILE = "/opt/rss-manager/logs/reddit_trends.log" +WP_CATEGORY = "Tech-Trends" + +# ── Logging ──────────────────────────────────────────────────── +os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) +logging.basicConfig( + filename=LOG_FILE, + level=logging.INFO, + format="%(asctime)s [tech_trends] %(levelname)s %(message)s", +) +log = logging.getLogger() + +# ── Hacker News API ──────────────────────────────────────────── +def fetch_hn_stories(feed="topstories", limit=30, min_score=50): + """Top/New/Best Stories von Hacker News holen.""" + try: + r = requests.get( + f"https://hacker-news.firebaseio.com/v0/{feed}.json", + timeout=10 + ) + ids = r.json()[:limit] + stories = [] + for story_id in ids: + try: + item = requests.get( + f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json", + timeout=5 + ).json() + if not item or item.get("type") != "story": + continue + if item.get("score", 0) < min_score: + continue + # Nur Artikel der letzten 48h + age_h = (datetime.now(timezone.utc).timestamp() - item.get("time", 0)) / 3600 + if age_h > 48: + continue + stories.append({ + "title": item.get("title", ""), + "url": item.get("url", ""), + "hn_url": f"https://news.ycombinator.com/item?id={story_id}", + "score": item.get("score", 0), + "comments": item.get("descendants", 0), + "source": f"HN/{feed}", + }) + except Exception: + continue + log.info(f"HN/{feed}: {len(stories)} Stories (Score≥{min_score}, <48h)") + return stories + except Exception as e: + log.warning(f"HN {feed} Fehler: {e}") + return [] + +# ── GitHub Trending (Search API) ────────────────────────────── +def fetch_github_trending(days=2, limit=10): + """Repos die in den letzten X Tagen die meisten Stars bekamen.""" + since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") + try: + r = requests.get( + "https://api.github.com/search/repositories", + params={ + "q": f"created:>{since}", + "sort": "stars", + "order": "desc", + "per_page": limit, + }, + headers={"Accept": "application/vnd.github.v3+json"}, + timeout=15, + ) + if r.status_code != 200: + log.warning(f"GitHub API: HTTP {r.status_code}") + return [] + repos = [] + for repo in r.json().get("items", []): + repos.append({ + "title": f"{repo['full_name']}: {repo.get('description', '') or ''}", + "url": repo["html_url"], + "hn_url": repo["html_url"], + "score": repo["stargazers_count"], + "comments": repo.get("forks_count", 0), + "source": "GitHub/Trending", + "language": repo.get("language", ""), + "topics": ", ".join(repo.get("topics", [])[:5]), + }) + log.info(f"GitHub Trending: {len(repos)} Repos (seit {since})") + return repos + except Exception as e: + log.warning(f"GitHub Trending Fehler: {e}") + return [] + +# ── OpenRouter Analyse ───────────────────────────────────────── +def analyse_with_ki(hn_stories, gh_repos): + hn_lines = [] + for i, s in enumerate(hn_stories, 1): + ext = f"\n 🔗 Artikel: {s['url']}" if s["url"] else "" + hn_lines.append( + f"{i}. [{s['source']}] {s['title']}" + f"\n ⭐ Score: {s['score']} | 💬 Kommentare: {s['comments']}" + f"\n 📎 HN: {s['hn_url']}{ext}" + ) + + gh_lines = [] + for i, r in enumerate(gh_repos, 1): + lang = f" [{r['language']}]" if r.get("language") else "" + topics = f" | Topics: {r['topics']}" if r.get("topics") else "" + gh_lines.append( + f"{i}. [GitHub]{lang} {r['title']}" + f"\n ⭐ Stars: {r['score']}{topics}" + f"\n 🔗 {r['url']}" + ) + + prompt = f"""Du bist ein Tech-Journalist. Analysiere diese aktuellen Tech-Signale von Hacker News und GitHub und schreibe einen informativen deutschen Artikel für ein technikaffines Publikum. + +=== HACKER NEWS (was Tech-Menschen gerade lesen & diskutieren) === +{chr(10).join(hn_lines)} + +=== GITHUB TRENDING (was gerade gebaut & gehypt wird) === +{chr(10).join(gh_lines)} + +AUFGABE: +1. Identifiziere die 5-7 stärksten Trends — HN-Score und GitHub-Stars zeigen echten Hype +2. Schreibe einen strukturierten deutschen Artikel: + - Knackige, neugierig machende Überschrift + - Einleitung: 4-5 Sätze — was bewegt die Tech-Welt heute konkret + - Pro Trend: H3-Überschrift + 4-5 Sätze mit echten Details aus den Quellen + Quellenlinks: +

📰 Quelle lesen[GITHUB_LINK]

+ wobei [GITHUB_LINK] = " | ⭐ GitHub" wenn es ein Repo-Trend ist + - Fazit: 3-4 Sätze Gesamtbild — wohin entwickelt sich die Branche +3. Mindestlänge: 25 Sätze, nutze konkrete Zahlen (Stars, Scores) als Belege +4. Stil: präzise, tiefgründig, kein Clickbait, auf Deutsch + +FORMAT: Nur HTML (h3, p, a). Kein Markdown, kein ```html.""" + + headers = { + "Authorization": f"Bearer {OPENROUTER_KEY}", + "Content-Type": "application/json", + "HTTP-Referer": "https://arakava-news-2.orbitalo.net", + "X-Title": "Arakava Tech Trends", + } + payload = { + "model": "anthropic/claude-3.5-haiku", + "messages": [{"role": "user", "content": prompt}], + "max_tokens": 4000, + "temperature": 0.7, + } + try: + r = requests.post( + "https://openrouter.ai/api/v1/chat/completions", + headers=headers, json=payload, timeout=90, verify=False, + ) + if r.status_code == 200: + content = r.json()["choices"][0]["message"]["content"].strip() + content = re.sub(r"^```html?\s*", "", content) + content = re.sub(r"\s*```$", "", content) + log.info(f"KI-Analyse OK: {len(content)} Zeichen") + return content + else: + log.error(f"OpenRouter Fehler {r.status_code}: {r.text[:200]}") + return None + except Exception as e: + log.error(f"OpenRouter Exception: {e}") + return None + +# ── Titel extrahieren ────────────────────────────────────────── +def extract_title(content): + m = re.search(r"]*>(.*?)", content, re.IGNORECASE | re.DOTALL) + if m: + return re.sub(r"<[^>]+>", "", m.group(1)).strip() + return f"Tech-Trends: Was die Szene heute bewegt – {datetime.now().strftime('%d.%m.%Y')}" + +# ── WordPress Kategorie ──────────────────────────────────────── +def get_or_create_category(name): + auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode() + h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"} + r = requests.get(f"{WP_URL}/wp-json/wp/v2/categories?search={name}&per_page=5", + headers=h, timeout=10, verify=False) + if r.status_code == 200: + for c in r.json(): + if c["name"].lower() == name.lower(): + return c["id"] + r = requests.post(f"{WP_URL}/wp-json/wp/v2/categories", + headers=h, json={"name": name}, timeout=10, verify=False) + if r.status_code == 201: + cat_id = r.json()["id"] + log.info(f"Kategorie '{name}' angelegt (ID {cat_id})") + return cat_id + return None + +# ── WordPress Post ───────────────────────────────────────────── +def post_to_wordpress(title, content, cat_id): + auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode() + h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"} + today = datetime.now().strftime("%d.%m.%Y") + data = { + "title": f"{title} [{today}]", + "content": content, + "status": "publish", + "categories": [cat_id] if cat_id else [], + } + r = requests.post(f"{WP_URL}/wp-json/wp/v2/posts", + headers=h, json=data, timeout=30, verify=False) + if r.status_code == 201: + url = r.json()["link"] + log.info(f"WordPress Post: {url}") + return url + log.error(f"WordPress Fehler {r.status_code}: {r.text[:200]}") + return None + +# ── Telegram ─────────────────────────────────────────────────── +def send_telegram(title, url): + text = f"📡 Tech-Trends\n\n{title}\n\nArtikel lesen" + try: + r = requests.post( + f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage", + json={"chat_id": TELEGRAM_CHAT, "text": text, + "parse_mode": "HTML", "disable_web_page_preview": False}, + timeout=10, + ) + if r.status_code == 200: + log.info("Telegram gesendet") + else: + log.warning(f"Telegram Fehler {r.status_code}") + except Exception as e: + log.warning(f"Telegram Exception: {e}") + +# ── Main ─────────────────────────────────────────────────────── +def main(): + log.info("=== Tech Trends Start ===") + + # Hacker News: Top + New Stories kombinieren + hn_top = fetch_hn_stories("topstories", limit=40, min_score=100) + hn_new = fetch_hn_stories("newstories", limit=30, min_score=30) + + # Duplikate entfernen, nach Score sortieren + seen = set() + hn_all = [] + for s in sorted(hn_top + hn_new, key=lambda x: x["score"], reverse=True): + key = s["title"][:60].lower() + if key not in seen: + seen.add(key) + hn_all.append(s) + + hn_stories = hn_all[:20] # Top 20 für die KI + log.info(f"HN gesamt: {len(hn_stories)} Stories") + + # GitHub Trending + gh_repos = fetch_github_trending(days=2, limit=10) + + if len(hn_stories) + len(gh_repos) < 5: + log.error("Zu wenige Quellen – Abbruch") + return + + content = analyse_with_ki(hn_stories, gh_repos) + if not content: + log.error("KI-Analyse fehlgeschlagen – Abbruch") + return + + title = extract_title(content) + log.info(f"Titel: {title}") + + cat_id = get_or_create_category(WP_CATEGORY) + post_url = post_to_wordpress(title, content, cat_id) + if not post_url: + return + + send_telegram(title, post_url) + log.info("=== Tech Trends Ende ===") + +if __name__ == "__main__": + main()