homelab-brain/arakava-news/reddit_trends.py

290 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Tech-Trends Workflow (Hacker News + GitHub Trending)
Täglich: Holt Top/New Stories von HN + GitHub Trending Repos,
analysiert mit OpenRouter, postet deutschen Artikel mit Quellenlinks nach WordPress.
"""
import requests
import base64
import logging
import os
import re
from datetime import datetime, timedelta, timezone
# ── Konfiguration ──────────────────────────────────────────────
WP_URL = "https://arakava-news-2.orbitalo.net"
WP_USER = "admin"
WP_PASS = "eJIyhW0p5PFacjvvKGufKeXS"
OPENROUTER_KEY = "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"
TELEGRAM_TOKEN = "8551565940:AAHIUpZND-tCNGv9yEoNPRyPt4GxEPYBJdE"
TELEGRAM_CHAT = "674951792"
LOG_FILE = "/opt/rss-manager/logs/reddit_trends.log"
WP_CATEGORY = "Tech-Trends"
# ── Logging ────────────────────────────────────────────────────
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format="%(asctime)s [tech_trends] %(levelname)s %(message)s",
)
log = logging.getLogger()
# ── Hacker News API ────────────────────────────────────────────
def fetch_hn_stories(feed="topstories", limit=30, min_score=50):
"""Top/New/Best Stories von Hacker News holen."""
try:
r = requests.get(
f"https://hacker-news.firebaseio.com/v0/{feed}.json",
timeout=10
)
ids = r.json()[:limit]
stories = []
for story_id in ids:
try:
item = requests.get(
f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json",
timeout=5
).json()
if not item or item.get("type") != "story":
continue
if item.get("score", 0) < min_score:
continue
# Nur Artikel der letzten 48h
age_h = (datetime.now(timezone.utc).timestamp() - item.get("time", 0)) / 3600
if age_h > 48:
continue
stories.append({
"title": item.get("title", ""),
"url": item.get("url", ""),
"hn_url": f"https://news.ycombinator.com/item?id={story_id}",
"score": item.get("score", 0),
"comments": item.get("descendants", 0),
"source": f"HN/{feed}",
})
except Exception:
continue
log.info(f"HN/{feed}: {len(stories)} Stories (Score≥{min_score}, <48h)")
return stories
except Exception as e:
log.warning(f"HN {feed} Fehler: {e}")
return []
# ── GitHub Trending (Search API) ──────────────────────────────
def fetch_github_trending(days=2, limit=10):
"""Repos die in den letzten X Tagen die meisten Stars bekamen."""
since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
try:
r = requests.get(
"https://api.github.com/search/repositories",
params={
"q": f"created:>{since}",
"sort": "stars",
"order": "desc",
"per_page": limit,
},
headers={"Accept": "application/vnd.github.v3+json"},
timeout=15,
)
if r.status_code != 200:
log.warning(f"GitHub API: HTTP {r.status_code}")
return []
repos = []
for repo in r.json().get("items", []):
repos.append({
"title": f"{repo['full_name']}: {repo.get('description', '') or ''}",
"url": repo["html_url"],
"hn_url": repo["html_url"],
"score": repo["stargazers_count"],
"comments": repo.get("forks_count", 0),
"source": "GitHub/Trending",
"language": repo.get("language", ""),
"topics": ", ".join(repo.get("topics", [])[:5]),
})
log.info(f"GitHub Trending: {len(repos)} Repos (seit {since})")
return repos
except Exception as e:
log.warning(f"GitHub Trending Fehler: {e}")
return []
# ── OpenRouter Analyse ─────────────────────────────────────────
def analyse_with_ki(hn_stories, gh_repos):
hn_lines = []
for i, s in enumerate(hn_stories, 1):
ext = f"\n 🔗 Artikel: {s['url']}" if s["url"] else ""
hn_lines.append(
f"{i}. [{s['source']}] {s['title']}"
f"\n ⭐ Score: {s['score']} | 💬 Kommentare: {s['comments']}"
f"\n 📎 HN: {s['hn_url']}{ext}"
)
gh_lines = []
for i, r in enumerate(gh_repos, 1):
lang = f" [{r['language']}]" if r.get("language") else ""
topics = f" | Topics: {r['topics']}" if r.get("topics") else ""
gh_lines.append(
f"{i}. [GitHub]{lang} {r['title']}"
f"\n ⭐ Stars: {r['score']}{topics}"
f"\n 🔗 {r['url']}"
)
prompt = f"""Du bist ein Tech-Journalist. Analysiere diese aktuellen Tech-Signale von Hacker News und GitHub und schreibe einen informativen deutschen Artikel für ein technikaffines Publikum.
=== HACKER NEWS (was Tech-Menschen gerade lesen & diskutieren) ===
{chr(10).join(hn_lines)}
=== GITHUB TRENDING (was gerade gebaut & gehypt wird) ===
{chr(10).join(gh_lines)}
AUFGABE:
1. Identifiziere die 5-7 stärksten Trends — HN-Score und GitHub-Stars zeigen echten Hype
2. Schreibe einen strukturierten deutschen Artikel:
- Knackige, neugierig machende Überschrift
- Einleitung: 4-5 Sätze — was bewegt die Tech-Welt heute konkret
- Pro Trend: H3-Überschrift + 4-5 Sätze mit echten Details aus den Quellen + Quellenlinks:
<p class="quellen"><a href="HN_ODER_ARTIKEL_URL">📰 Quelle lesen</a>[GITHUB_LINK]</p>
wobei [GITHUB_LINK] = " | <a href=\"GITHUB_URL\">⭐ GitHub</a>" wenn es ein Repo-Trend ist
- Fazit: 3-4 Sätze Gesamtbild — wohin entwickelt sich die Branche
3. Mindestlänge: 25 Sätze, nutze konkrete Zahlen (Stars, Scores) als Belege
4. Stil: präzise, tiefgründig, kein Clickbait, auf Deutsch
FORMAT: Nur HTML (h3, p, a). Kein Markdown, kein ```html."""
headers = {
"Authorization": f"Bearer {OPENROUTER_KEY}",
"Content-Type": "application/json",
"HTTP-Referer": "https://arakava-news-2.orbitalo.net",
"X-Title": "Arakava Tech Trends",
}
payload = {
"model": "anthropic/claude-3.5-haiku",
"messages": [{"role": "user", "content": prompt}],
"max_tokens": 4000,
"temperature": 0.7,
}
try:
r = requests.post(
"https://openrouter.ai/api/v1/chat/completions",
headers=headers, json=payload, timeout=90, verify=False,
)
if r.status_code == 200:
content = r.json()["choices"][0]["message"]["content"].strip()
content = re.sub(r"^```html?\s*", "", content)
content = re.sub(r"\s*```$", "", content)
log.info(f"KI-Analyse OK: {len(content)} Zeichen")
return content
else:
log.error(f"OpenRouter Fehler {r.status_code}: {r.text[:200]}")
return None
except Exception as e:
log.error(f"OpenRouter Exception: {e}")
return None
# ── Titel extrahieren ──────────────────────────────────────────
def extract_title(content):
m = re.search(r"<h[123][^>]*>(.*?)</h[123]>", content, re.IGNORECASE | re.DOTALL)
if m:
return re.sub(r"<[^>]+>", "", m.group(1)).strip()
return f"Tech-Trends: Was die Szene heute bewegt {datetime.now().strftime('%d.%m.%Y')}"
# ── WordPress Kategorie ────────────────────────────────────────
def get_or_create_category(name):
auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode()
h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"}
r = requests.get(f"{WP_URL}/wp-json/wp/v2/categories?search={name}&per_page=5",
headers=h, timeout=10, verify=False)
if r.status_code == 200:
for c in r.json():
if c["name"].lower() == name.lower():
return c["id"]
r = requests.post(f"{WP_URL}/wp-json/wp/v2/categories",
headers=h, json={"name": name}, timeout=10, verify=False)
if r.status_code == 201:
cat_id = r.json()["id"]
log.info(f"Kategorie '{name}' angelegt (ID {cat_id})")
return cat_id
return None
# ── WordPress Post ─────────────────────────────────────────────
def post_to_wordpress(title, content, cat_id):
auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode()
h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"}
today = datetime.now().strftime("%d.%m.%Y")
data = {
"title": f"{title} [{today}]",
"content": content,
"status": "publish",
"categories": [cat_id] if cat_id else [],
}
r = requests.post(f"{WP_URL}/wp-json/wp/v2/posts",
headers=h, json=data, timeout=30, verify=False)
if r.status_code == 201:
url = r.json()["link"]
log.info(f"WordPress Post: {url}")
return url
log.error(f"WordPress Fehler {r.status_code}: {r.text[:200]}")
return None
# ── Telegram ───────────────────────────────────────────────────
def send_telegram(title, url):
text = f"📡 <b>Tech-Trends</b>\n\n<b>{title}</b>\n\n<a href=\"{url}\">Artikel lesen</a>"
try:
r = requests.post(
f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage",
json={"chat_id": TELEGRAM_CHAT, "text": text,
"parse_mode": "HTML", "disable_web_page_preview": False},
timeout=10,
)
if r.status_code == 200:
log.info("Telegram gesendet")
else:
log.warning(f"Telegram Fehler {r.status_code}")
except Exception as e:
log.warning(f"Telegram Exception: {e}")
# ── Main ───────────────────────────────────────────────────────
def main():
log.info("=== Tech Trends Start ===")
# Hacker News: Top + New Stories kombinieren
hn_top = fetch_hn_stories("topstories", limit=40, min_score=100)
hn_new = fetch_hn_stories("newstories", limit=30, min_score=30)
# Duplikate entfernen, nach Score sortieren
seen = set()
hn_all = []
for s in sorted(hn_top + hn_new, key=lambda x: x["score"], reverse=True):
key = s["title"][:60].lower()
if key not in seen:
seen.add(key)
hn_all.append(s)
hn_stories = hn_all[:20] # Top 20 für die KI
log.info(f"HN gesamt: {len(hn_stories)} Stories")
# GitHub Trending
gh_repos = fetch_github_trending(days=2, limit=10)
if len(hn_stories) + len(gh_repos) < 5:
log.error("Zu wenige Quellen Abbruch")
return
content = analyse_with_ki(hn_stories, gh_repos)
if not content:
log.error("KI-Analyse fehlgeschlagen Abbruch")
return
title = extract_title(content)
log.info(f"Titel: {title}")
cat_id = get_or_create_category(WP_CATEGORY)
post_url = post_to_wordpress(title, content, cat_id)
if not post_url:
return
send_telegram(title, post_url)
log.info("=== Tech Trends Ende ===")
if __name__ == "__main__":
main()