Add: Tech-Trends Workflow (HN + GitHub Trending)
This commit is contained in:
parent
2a19f4c967
commit
6288bedc20
1 changed files with 290 additions and 0 deletions
290
arakava-news/reddit_trends.py
Normal file
290
arakava-news/reddit_trends.py
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tech-Trends Workflow (Hacker News + GitHub Trending)
|
||||
Täglich: Holt Top/New Stories von HN + GitHub Trending Repos,
|
||||
analysiert mit OpenRouter, postet deutschen Artikel mit Quellenlinks nach WordPress.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
|
||||
# ── Konfiguration ──────────────────────────────────────────────
|
||||
WP_URL = "https://arakava-news-2.orbitalo.net"
|
||||
WP_USER = "admin"
|
||||
WP_PASS = "eJIyhW0p5PFacjvvKGufKeXS"
|
||||
OPENROUTER_KEY = "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"
|
||||
TELEGRAM_TOKEN = "8551565940:AAHIUpZND-tCNGv9yEoNPRyPt4GxEPYBJdE"
|
||||
TELEGRAM_CHAT = "674951792"
|
||||
LOG_FILE = "/opt/rss-manager/logs/reddit_trends.log"
|
||||
WP_CATEGORY = "Tech-Trends"
|
||||
|
||||
# ── Logging ────────────────────────────────────────────────────
|
||||
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
|
||||
logging.basicConfig(
|
||||
filename=LOG_FILE,
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [tech_trends] %(levelname)s %(message)s",
|
||||
)
|
||||
log = logging.getLogger()
|
||||
|
||||
# ── Hacker News API ────────────────────────────────────────────
|
||||
def fetch_hn_stories(feed="topstories", limit=30, min_score=50):
|
||||
"""Top/New/Best Stories von Hacker News holen."""
|
||||
try:
|
||||
r = requests.get(
|
||||
f"https://hacker-news.firebaseio.com/v0/{feed}.json",
|
||||
timeout=10
|
||||
)
|
||||
ids = r.json()[:limit]
|
||||
stories = []
|
||||
for story_id in ids:
|
||||
try:
|
||||
item = requests.get(
|
||||
f"https://hacker-news.firebaseio.com/v0/item/{story_id}.json",
|
||||
timeout=5
|
||||
).json()
|
||||
if not item or item.get("type") != "story":
|
||||
continue
|
||||
if item.get("score", 0) < min_score:
|
||||
continue
|
||||
# Nur Artikel der letzten 48h
|
||||
age_h = (datetime.now(timezone.utc).timestamp() - item.get("time", 0)) / 3600
|
||||
if age_h > 48:
|
||||
continue
|
||||
stories.append({
|
||||
"title": item.get("title", ""),
|
||||
"url": item.get("url", ""),
|
||||
"hn_url": f"https://news.ycombinator.com/item?id={story_id}",
|
||||
"score": item.get("score", 0),
|
||||
"comments": item.get("descendants", 0),
|
||||
"source": f"HN/{feed}",
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
log.info(f"HN/{feed}: {len(stories)} Stories (Score≥{min_score}, <48h)")
|
||||
return stories
|
||||
except Exception as e:
|
||||
log.warning(f"HN {feed} Fehler: {e}")
|
||||
return []
|
||||
|
||||
# ── GitHub Trending (Search API) ──────────────────────────────
|
||||
def fetch_github_trending(days=2, limit=10):
|
||||
"""Repos die in den letzten X Tagen die meisten Stars bekamen."""
|
||||
since = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
|
||||
try:
|
||||
r = requests.get(
|
||||
"https://api.github.com/search/repositories",
|
||||
params={
|
||||
"q": f"created:>{since}",
|
||||
"sort": "stars",
|
||||
"order": "desc",
|
||||
"per_page": limit,
|
||||
},
|
||||
headers={"Accept": "application/vnd.github.v3+json"},
|
||||
timeout=15,
|
||||
)
|
||||
if r.status_code != 200:
|
||||
log.warning(f"GitHub API: HTTP {r.status_code}")
|
||||
return []
|
||||
repos = []
|
||||
for repo in r.json().get("items", []):
|
||||
repos.append({
|
||||
"title": f"{repo['full_name']}: {repo.get('description', '') or ''}",
|
||||
"url": repo["html_url"],
|
||||
"hn_url": repo["html_url"],
|
||||
"score": repo["stargazers_count"],
|
||||
"comments": repo.get("forks_count", 0),
|
||||
"source": "GitHub/Trending",
|
||||
"language": repo.get("language", ""),
|
||||
"topics": ", ".join(repo.get("topics", [])[:5]),
|
||||
})
|
||||
log.info(f"GitHub Trending: {len(repos)} Repos (seit {since})")
|
||||
return repos
|
||||
except Exception as e:
|
||||
log.warning(f"GitHub Trending Fehler: {e}")
|
||||
return []
|
||||
|
||||
# ── OpenRouter Analyse ─────────────────────────────────────────
|
||||
def analyse_with_ki(hn_stories, gh_repos):
|
||||
hn_lines = []
|
||||
for i, s in enumerate(hn_stories, 1):
|
||||
ext = f"\n 🔗 Artikel: {s['url']}" if s["url"] else ""
|
||||
hn_lines.append(
|
||||
f"{i}. [{s['source']}] {s['title']}"
|
||||
f"\n ⭐ Score: {s['score']} | 💬 Kommentare: {s['comments']}"
|
||||
f"\n 📎 HN: {s['hn_url']}{ext}"
|
||||
)
|
||||
|
||||
gh_lines = []
|
||||
for i, r in enumerate(gh_repos, 1):
|
||||
lang = f" [{r['language']}]" if r.get("language") else ""
|
||||
topics = f" | Topics: {r['topics']}" if r.get("topics") else ""
|
||||
gh_lines.append(
|
||||
f"{i}. [GitHub]{lang} {r['title']}"
|
||||
f"\n ⭐ Stars: {r['score']}{topics}"
|
||||
f"\n 🔗 {r['url']}"
|
||||
)
|
||||
|
||||
prompt = f"""Du bist ein Tech-Journalist. Analysiere diese aktuellen Tech-Signale von Hacker News und GitHub und schreibe einen informativen deutschen Artikel für ein technikaffines Publikum.
|
||||
|
||||
=== HACKER NEWS (was Tech-Menschen gerade lesen & diskutieren) ===
|
||||
{chr(10).join(hn_lines)}
|
||||
|
||||
=== GITHUB TRENDING (was gerade gebaut & gehypt wird) ===
|
||||
{chr(10).join(gh_lines)}
|
||||
|
||||
AUFGABE:
|
||||
1. Identifiziere die 5-7 stärksten Trends — HN-Score und GitHub-Stars zeigen echten Hype
|
||||
2. Schreibe einen strukturierten deutschen Artikel:
|
||||
- Knackige, neugierig machende Überschrift
|
||||
- Einleitung: 4-5 Sätze — was bewegt die Tech-Welt heute konkret
|
||||
- Pro Trend: H3-Überschrift + 4-5 Sätze mit echten Details aus den Quellen + Quellenlinks:
|
||||
<p class="quellen"><a href="HN_ODER_ARTIKEL_URL">📰 Quelle lesen</a>[GITHUB_LINK]</p>
|
||||
wobei [GITHUB_LINK] = " | <a href=\"GITHUB_URL\">⭐ GitHub</a>" wenn es ein Repo-Trend ist
|
||||
- Fazit: 3-4 Sätze Gesamtbild — wohin entwickelt sich die Branche
|
||||
3. Mindestlänge: 25 Sätze, nutze konkrete Zahlen (Stars, Scores) als Belege
|
||||
4. Stil: präzise, tiefgründig, kein Clickbait, auf Deutsch
|
||||
|
||||
FORMAT: Nur HTML (h3, p, a). Kein Markdown, kein ```html."""
|
||||
|
||||
headers = {
|
||||
"Authorization": f"Bearer {OPENROUTER_KEY}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://arakava-news-2.orbitalo.net",
|
||||
"X-Title": "Arakava Tech Trends",
|
||||
}
|
||||
payload = {
|
||||
"model": "anthropic/claude-3.5-haiku",
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.7,
|
||||
}
|
||||
try:
|
||||
r = requests.post(
|
||||
"https://openrouter.ai/api/v1/chat/completions",
|
||||
headers=headers, json=payload, timeout=90, verify=False,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
content = r.json()["choices"][0]["message"]["content"].strip()
|
||||
content = re.sub(r"^```html?\s*", "", content)
|
||||
content = re.sub(r"\s*```$", "", content)
|
||||
log.info(f"KI-Analyse OK: {len(content)} Zeichen")
|
||||
return content
|
||||
else:
|
||||
log.error(f"OpenRouter Fehler {r.status_code}: {r.text[:200]}")
|
||||
return None
|
||||
except Exception as e:
|
||||
log.error(f"OpenRouter Exception: {e}")
|
||||
return None
|
||||
|
||||
# ── Titel extrahieren ──────────────────────────────────────────
|
||||
def extract_title(content):
|
||||
m = re.search(r"<h[123][^>]*>(.*?)</h[123]>", content, re.IGNORECASE | re.DOTALL)
|
||||
if m:
|
||||
return re.sub(r"<[^>]+>", "", m.group(1)).strip()
|
||||
return f"Tech-Trends: Was die Szene heute bewegt – {datetime.now().strftime('%d.%m.%Y')}"
|
||||
|
||||
# ── WordPress Kategorie ────────────────────────────────────────
|
||||
def get_or_create_category(name):
|
||||
auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode()
|
||||
h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"}
|
||||
r = requests.get(f"{WP_URL}/wp-json/wp/v2/categories?search={name}&per_page=5",
|
||||
headers=h, timeout=10, verify=False)
|
||||
if r.status_code == 200:
|
||||
for c in r.json():
|
||||
if c["name"].lower() == name.lower():
|
||||
return c["id"]
|
||||
r = requests.post(f"{WP_URL}/wp-json/wp/v2/categories",
|
||||
headers=h, json={"name": name}, timeout=10, verify=False)
|
||||
if r.status_code == 201:
|
||||
cat_id = r.json()["id"]
|
||||
log.info(f"Kategorie '{name}' angelegt (ID {cat_id})")
|
||||
return cat_id
|
||||
return None
|
||||
|
||||
# ── WordPress Post ─────────────────────────────────────────────
|
||||
def post_to_wordpress(title, content, cat_id):
|
||||
auth = base64.b64encode(f"{WP_USER}:{WP_PASS}".encode()).decode()
|
||||
h = {"Authorization": f"Basic {auth}", "Content-Type": "application/json"}
|
||||
today = datetime.now().strftime("%d.%m.%Y")
|
||||
data = {
|
||||
"title": f"{title} [{today}]",
|
||||
"content": content,
|
||||
"status": "publish",
|
||||
"categories": [cat_id] if cat_id else [],
|
||||
}
|
||||
r = requests.post(f"{WP_URL}/wp-json/wp/v2/posts",
|
||||
headers=h, json=data, timeout=30, verify=False)
|
||||
if r.status_code == 201:
|
||||
url = r.json()["link"]
|
||||
log.info(f"WordPress Post: {url}")
|
||||
return url
|
||||
log.error(f"WordPress Fehler {r.status_code}: {r.text[:200]}")
|
||||
return None
|
||||
|
||||
# ── Telegram ───────────────────────────────────────────────────
|
||||
def send_telegram(title, url):
|
||||
text = f"📡 <b>Tech-Trends</b>\n\n<b>{title}</b>\n\n<a href=\"{url}\">Artikel lesen</a>"
|
||||
try:
|
||||
r = requests.post(
|
||||
f"https://api.telegram.org/bot{TELEGRAM_TOKEN}/sendMessage",
|
||||
json={"chat_id": TELEGRAM_CHAT, "text": text,
|
||||
"parse_mode": "HTML", "disable_web_page_preview": False},
|
||||
timeout=10,
|
||||
)
|
||||
if r.status_code == 200:
|
||||
log.info("Telegram gesendet")
|
||||
else:
|
||||
log.warning(f"Telegram Fehler {r.status_code}")
|
||||
except Exception as e:
|
||||
log.warning(f"Telegram Exception: {e}")
|
||||
|
||||
# ── Main ───────────────────────────────────────────────────────
|
||||
def main():
|
||||
log.info("=== Tech Trends Start ===")
|
||||
|
||||
# Hacker News: Top + New Stories kombinieren
|
||||
hn_top = fetch_hn_stories("topstories", limit=40, min_score=100)
|
||||
hn_new = fetch_hn_stories("newstories", limit=30, min_score=30)
|
||||
|
||||
# Duplikate entfernen, nach Score sortieren
|
||||
seen = set()
|
||||
hn_all = []
|
||||
for s in sorted(hn_top + hn_new, key=lambda x: x["score"], reverse=True):
|
||||
key = s["title"][:60].lower()
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
hn_all.append(s)
|
||||
|
||||
hn_stories = hn_all[:20] # Top 20 für die KI
|
||||
log.info(f"HN gesamt: {len(hn_stories)} Stories")
|
||||
|
||||
# GitHub Trending
|
||||
gh_repos = fetch_github_trending(days=2, limit=10)
|
||||
|
||||
if len(hn_stories) + len(gh_repos) < 5:
|
||||
log.error("Zu wenige Quellen – Abbruch")
|
||||
return
|
||||
|
||||
content = analyse_with_ki(hn_stories, gh_repos)
|
||||
if not content:
|
||||
log.error("KI-Analyse fehlgeschlagen – Abbruch")
|
||||
return
|
||||
|
||||
title = extract_title(content)
|
||||
log.info(f"Titel: {title}")
|
||||
|
||||
cat_id = get_or_create_category(WP_CATEGORY)
|
||||
post_url = post_to_wordpress(title, content, cat_id)
|
||||
if not post_url:
|
||||
return
|
||||
|
||||
send_telegram(title, post_url)
|
||||
log.info("=== Tech Trends Ende ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Reference in a new issue