Auto-Sync: 2026-04-17 21:15
This commit is contained in:
parent
e402899aef
commit
7a37b749f3
5 changed files with 816 additions and 5 deletions
|
|
@ -1,5 +1,5 @@
|
|||
# Arakava News — Live State
|
||||
> Auto-generiert: 2026-04-17 21:00
|
||||
> Auto-generiert: 2026-04-17 21:15
|
||||
|
||||
## Service Status
|
||||
| Service | CT | Status |
|
||||
|
|
@ -8,11 +8,11 @@
|
|||
| WordPress Docker | 101 | running |
|
||||
|
||||
## Letzte Feed-Aktivität (Top 5)
|
||||
Heise Security: 2026-04-17 17:41:02
|
||||
NachDenkSeiten: 2026-04-17 19:00:17
|
||||
Heise Security: 2026-04-17 17:41:02
|
||||
Golem.de: 2026-04-17 17:41:02
|
||||
Norbert Häring: 2026-04-17 17:40:53
|
||||
Heise Online: 2026-04-17 16:41:00
|
||||
Rubikon.news: 2026-04-17 16:40:52
|
||||
|
||||
## Fehler (letzte 24h)
|
||||
- Fehler gesamt: 0
|
||||
|
|
|
|||
501
homelab-ai-bot/tools/rag.py.bak-multiquery
Normal file
501
homelab-ai-bot/tools/rag.py.bak-multiquery
Normal file
|
|
@ -0,0 +1,501 @@
|
|||
"""RAG Dokumentensuche — Elasticsearch direkt (Hybrid: kNN + deutscher Text).
|
||||
|
||||
RAGFlow bleibt Ingestion; Suche geht direkt an ES (Issue #51).
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
log = logging.getLogger("tools.rag")
|
||||
|
||||
ES_BASE = "http://100.109.101.12:1200"
|
||||
ES_USER = "elastic"
|
||||
ES_PASS = "infini_rag_flow"
|
||||
ES_INDEX = "ragflow_61f51c8c279011f1a174bd19863ba33e"
|
||||
KB_ID = "dc24edda27a311f19fe7fb811de6f016"
|
||||
OLLAMA_EMBED_URL = "http://100.84.255.83:11434/api/embeddings"
|
||||
EMBED_MODEL = "nomic-embed-text"
|
||||
|
||||
# Cross-Encoder Reranking (CT 123, pve-hetzner LAN)
|
||||
RERANKER_URL = "http://10.10.10.123:8099"
|
||||
RERANK_CANDIDATES = 15
|
||||
RERANK_TIMEOUT = 45
|
||||
RERANK_SNIPPET_CHARS = 512
|
||||
|
||||
MIN_TOP_K = 5
|
||||
# Breite Übersichten: mehr ES-Runden, mehr distinct Treffer (pro vollem Pfad docnm_kwd)
|
||||
MAX_TOP_K_NORMAL = 25
|
||||
MAX_TOP_K_WIDE = 60
|
||||
ES_SIZE_CAP = 200
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "rag_search",
|
||||
"description": (
|
||||
"Durchsucht die private Dokumenten-Wissensbasis (>21.000 Dokumente: "
|
||||
"Vertraege, Versicherungen, Rente, Finanzamt, Familiendokumente, "
|
||||
"Anleitungen, Buecher, persoenliche Unterlagen). "
|
||||
"Nutze dieses Tool wenn der User nach einem bestimmten Dokument, "
|
||||
"Vertrag, Brief oder persoenlicher Information fragt. "
|
||||
"Bei breiten Fragen ('welche Versicherungen', Jahreskosten, Listen) "
|
||||
"top_k=15 oder hoeher setzen."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": (
|
||||
"Suchanfrage: Dokumentname, Thema oder Inhalt. Kurz und praezise, "
|
||||
"z.B. 'Familienbuch Opa Oma' oder 'Grundsteuer Erklaerung 2024'"
|
||||
),
|
||||
},
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"description": "Anzahl Ergebnisse (5-25, Standard 10)",
|
||||
"default": 10,
|
||||
},
|
||||
},
|
||||
"required": ["query"],
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
SYSTEM_PROMPT_EXTRA = """RAG DOKUMENTENSUCHE — PFLICHT-REGELN:
|
||||
Du hast Zugriff auf eine private Wissensbasis mit >21.000 Dokumenten (Vertraege, Versicherungen, Rente, Finanzamt, Familiendokumente, Anleitungen, Buecher, persoenliche Unterlagen, Arbeitsvertraege, Kindergeld, Reisepass, Personalausweis, KFZ, Mietvertraege, Bausparvertraege, Rechnungen).
|
||||
|
||||
WANN rag_search AUFRUFEN — IMMER bei diesen Fragen:
|
||||
- "habe ich..." / "gibt es..." / "wo ist..." / "finde..." / "zeig mir..." + Dokument/Vertrag/Versicherung/Bescheid
|
||||
- Jede Frage nach persoenlichen Unterlagen, Vertraegen, Versicherungen, Rechnungen, Bescheiden
|
||||
- AUCH wenn du glaubst die Antwort zu kennen — das Gedaechtnis ist NICHT die Wissensbasis!
|
||||
- AUCH wenn das Thema im Gedaechtnis steht — trotzdem rag_search aufrufen fuer vollstaendige Antwort
|
||||
|
||||
WANN NICHT: Nur bei reinen Homelab/IT-Fragen, Smalltalk, oder wenn der User explizit NICHT nach Dokumenten fragt.
|
||||
|
||||
SUCHANFRAGE: Kurze Keywords, KEINE ganzen Saetze. Beispiele:
|
||||
- "Familienbuch" / "Grundsteuer Erklaerung" / "Haftpflicht" / "Kindergeld" / "Mietvertrag" / "Arbeitsvertrag" / "Reisepass"
|
||||
|
||||
ERGEBNISSE AUSWERTEN:
|
||||
- Bei breiten Fragen ("welche Versicherungen", Jahreskosten, Listen): top_k=15-25, ALLE Treffer aus der Tool-Antwort abarbeiten
|
||||
- Liste die gefundenen Dokumente mit Ordner und kurzem Inhalt auf
|
||||
- ERFINDE KEINE Details die nicht im Ergebnis stehen
|
||||
- Der Ordnerpfad (vor dem Dateinamen, getrennt durch __) zeigt die Kategorie
|
||||
- Wenn rag_search Treffer liefert: IMMER auflisten, auch wenn Inhalt unvollstaendig
|
||||
- Mehrere Treffer zur gleichen Versicherung/Gesellschaft: jede Sparte/Dokumentart separat nennen (Kfz, Rechtsschutz, Haftpflicht, Sach, Ausland, Kranken), mit Dateiname/Ordner
|
||||
- Antworte NIEMALS "keine gefunden" oder "nicht gespeichert" OHNE vorher rag_search aufgerufen zu haben"""
|
||||
|
||||
|
||||
def _basic_auth_header() -> str:
|
||||
token = base64.b64encode(f"{ES_USER}:{ES_PASS}".encode()).decode()
|
||||
return f"Basic {token}"
|
||||
|
||||
|
||||
def _ollama_embed(text: str) -> list | None:
|
||||
body = json.dumps({"model": EMBED_MODEL, "prompt": text}).encode()
|
||||
req = urllib.request.Request(
|
||||
OLLAMA_EMBED_URL,
|
||||
data=body,
|
||||
method="POST",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
data = json.load(resp)
|
||||
emb = data.get("embedding")
|
||||
if not emb:
|
||||
return None
|
||||
if len(emb) != 768:
|
||||
log.warning("Unexpected embedding dimension %s", len(emb))
|
||||
return emb
|
||||
except Exception as e:
|
||||
log.error("Ollama embed error: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def _ocr_note(text: str) -> str:
|
||||
if not text or len(text) < 40:
|
||||
return ""
|
||||
non_alnum = sum(1 for c in text if not c.isalnum() and not c.isspace())
|
||||
ratio = non_alnum / max(len(text), 1)
|
||||
words = re.findall(r"\w+", text, re.UNICODE)
|
||||
avg_len = (sum(len(w) for w in words) / len(words)) if words else 0.0
|
||||
if ratio > 0.15 or avg_len < 2.0:
|
||||
return " [OCR vermutlich schlecht]"
|
||||
return ""
|
||||
|
||||
|
||||
def _folder_from_docname(name: str) -> str:
|
||||
"""Extrahiert den Ordnerpfad aus docnm_kwd (__ = Trenner)."""
|
||||
parts = name.rsplit("__", 1)
|
||||
if len(parts) == 2:
|
||||
return parts[0].replace("__", " > ").replace("_", " ")
|
||||
return ""
|
||||
|
||||
|
||||
def _dedup_key(name: str) -> str:
|
||||
"""Normalisiert Dokumentnamen fuer Deduplizierung.
|
||||
|
||||
Extrahiert nur den Dateinamen (nach letztem __), ignoriert
|
||||
Dateiendung und Kopie-Marker wie (1), (2).
|
||||
'Ordner__Foo(1).pdf' und 'Anderer__Foo.txt' werden als gleich behandelt.
|
||||
"""
|
||||
fname = name.rsplit("__", 1)[-1] if "__" in name else name
|
||||
key = re.sub(r"\.(pdf|txt|docx?|xlsx?|csv|png|jpg|jpeg)$", "", fname, flags=re.IGNORECASE)
|
||||
key = re.sub(r"\s*\(\d+\)\s*$", "", key).rstrip()
|
||||
return key.lower()
|
||||
|
||||
|
||||
def _dedup_key_full_doc(name: str) -> str:
|
||||
"""Ein Chunk pro vollem docnm_kwd — gleicher Dateiname in verschiedenen Ordnern bleibt getrennt."""
|
||||
return re.sub(r"\s+", " ", (name or "").strip().lower())
|
||||
|
||||
|
||||
def _es_hybrid_search(query: str, es_size: int) -> dict:
|
||||
qvec = _ollama_embed(query)
|
||||
if not qvec:
|
||||
return {"_error": "Embedding fehlgeschlagen (Ollama nicht erreichbar?)."}
|
||||
|
||||
es_size = min(ES_SIZE_CAP, max(es_size, 20))
|
||||
kb_filter = {"term": {"kb_id": KB_ID}}
|
||||
body = {
|
||||
"size": es_size,
|
||||
"knn": {
|
||||
"field": "q_768_vec",
|
||||
"query_vector": qvec,
|
||||
"k": es_size,
|
||||
"num_candidates": min(500, max(es_size * 5, 150)),
|
||||
"filter": [kb_filter],
|
||||
},
|
||||
"query": {
|
||||
"bool": {
|
||||
"filter": [kb_filter],
|
||||
"should": [
|
||||
{"match": {"content_de": {"query": query, "boost": 2.0}}},
|
||||
{"match": {"content_ltks": {"query": query.lower(), "boost": 0.4}}},
|
||||
{"match": {"docnm_kwd": {"query": query, "boost": 3.0}}},
|
||||
],
|
||||
"minimum_should_match": 0,
|
||||
}
|
||||
},
|
||||
}
|
||||
url = f"{ES_BASE}/{ES_INDEX}/_search"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=json.dumps(body).encode(),
|
||||
method="POST",
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": _basic_auth_header(),
|
||||
},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
return json.load(resp)
|
||||
except urllib.error.HTTPError as e:
|
||||
err = e.read().decode(errors="replace")[:800]
|
||||
log.error("ES HTTP %s: %s", e.code, err)
|
||||
return {"_error": f"ES HTTP {e.code}: {err}"}
|
||||
except Exception as e:
|
||||
log.error("ES search error: %s", e)
|
||||
return {"_error": str(e)}
|
||||
|
||||
|
||||
def _snippet_for_rerank(src: dict) -> str:
|
||||
doc_name = src.get("docnm_kwd") or ""
|
||||
raw = src.get("content_with_weight") or src.get("content_de") or ""
|
||||
prefix = doc_name[:120] + "\n" if doc_name else ""
|
||||
return prefix + raw[:RERANK_SNIPPET_CHARS]
|
||||
|
||||
|
||||
def _rerank_hits(query: str, hits: list) -> tuple[list, bool]:
|
||||
"""Rerankt mit Cross-Encoder, kombiniert Score mit ES-Rang (RRF)."""
|
||||
if not hits or not RERANKER_URL:
|
||||
return hits, False
|
||||
to_score = hits[:RERANK_CANDIDATES]
|
||||
docs = []
|
||||
for h in to_score:
|
||||
src = h.get("_source") or {}
|
||||
docs.append(_snippet_for_rerank(src))
|
||||
if not any((d or "").strip() for d in docs):
|
||||
return hits, False
|
||||
body = json.dumps({"query": query, "documents": docs}).encode()
|
||||
url = f"{RERANKER_URL.rstrip('/')}/rerank"
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
data=body,
|
||||
method="POST",
|
||||
headers={"Content-Type": "application/json"},
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=RERANK_TIMEOUT) as resp:
|
||||
data = json.load(resp)
|
||||
scores = data.get("scores") or []
|
||||
if len(scores) != len(to_score):
|
||||
log.warning(
|
||||
"rerank score count mismatch: %s vs %s",
|
||||
len(scores),
|
||||
len(to_score),
|
||||
)
|
||||
return hits, False
|
||||
|
||||
k = 60
|
||||
rr_ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
|
||||
rr_rank_map = {i: rank + 1 for rank, i in enumerate(rr_ranked)}
|
||||
|
||||
combined: list[tuple[float, int]] = []
|
||||
for idx in range(len(to_score)):
|
||||
es_rank = idx + 1
|
||||
rr_rank = rr_rank_map[idx]
|
||||
rrf = 1.0 / (k + es_rank) + 1.0 / (k + rr_rank)
|
||||
combined.append((rrf, idx))
|
||||
combined.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
new_order: list = []
|
||||
for rrf, idx in combined:
|
||||
h = to_score[idx]
|
||||
h["_rerank_score"] = float(scores[idx])
|
||||
h["_rrf_score"] = float(rrf)
|
||||
new_order.append(h)
|
||||
rest = hits[RERANK_CANDIDATES:]
|
||||
return new_order + rest, True
|
||||
except Exception as e:
|
||||
log.warning("rerank failed (fallback to ES): %s", e)
|
||||
return hits, False
|
||||
|
||||
|
||||
def _is_wide_recall_query(q: str) -> bool:
|
||||
"""Übersichts-/Listen-/Kostenfragen: mehrfach suchen und mergen."""
|
||||
ql = (q or "").lower()
|
||||
if any(x in ql for x in ("welche versicherung", "alle versicherung", "versicherungen habe")):
|
||||
return True
|
||||
if "versicherung" in ql and any(
|
||||
x in ql
|
||||
for x in (
|
||||
"welche",
|
||||
"alle",
|
||||
"liste",
|
||||
"überblick",
|
||||
"ueberblick",
|
||||
"kosten",
|
||||
"beitrag",
|
||||
"jähr",
|
||||
"jaehr",
|
||||
"jahres",
|
||||
"gesamt",
|
||||
"summe",
|
||||
"übersicht",
|
||||
"uebersicht",
|
||||
)
|
||||
):
|
||||
return True
|
||||
costish = any(
|
||||
x in ql
|
||||
for x in (
|
||||
"kosten",
|
||||
"kostet",
|
||||
"wie viel",
|
||||
"wieviel",
|
||||
"beitrag",
|
||||
"beiträge",
|
||||
"beitraege",
|
||||
"eur",
|
||||
"euro",
|
||||
"jähr",
|
||||
"jaehr",
|
||||
"jahreskosten",
|
||||
"prämie",
|
||||
"praemie",
|
||||
)
|
||||
)
|
||||
broad = any(
|
||||
x in ql
|
||||
for x in (
|
||||
"liste",
|
||||
"übersicht",
|
||||
"uebersicht",
|
||||
"alle",
|
||||
"gesamt",
|
||||
"summe",
|
||||
"jährlich",
|
||||
"jaehrlich",
|
||||
)
|
||||
)
|
||||
return costish and broad
|
||||
|
||||
|
||||
# Zusatzanfragen decken Sparten + Gesellschaften ab (Recall)
|
||||
_WIDE_SUBQUERIES = [
|
||||
"Versicherung Beitragsrechnung Jahresbeitrag",
|
||||
"Wohngebäudeversicherung Gebäude Beitrag",
|
||||
"Hausratversicherung Beitrag Ergo",
|
||||
"Haftpflichtversicherung Beitrag GARANTA",
|
||||
"Kfz Kasko Haftpflicht Beitragsrechnung",
|
||||
"Rechtsschutzversicherung Beitrag",
|
||||
"Lebensversicherung Beitrag",
|
||||
"Krankenversicherung PKV Beitrag",
|
||||
"Sachversicherung LVM Beitrag",
|
||||
"LVM AutoPlus Versicherungsschein",
|
||||
"Allianz Versicherung Police",
|
||||
"Nürnberger Versicherung Beitrag",
|
||||
"Ergo Versicherung Police",
|
||||
"Unfallversicherung Berufsunfähigkeit",
|
||||
"Bausparvertrag Bauspar",
|
||||
"Ford Transit Nutzfahrzeug Versicherung",
|
||||
"Kfz Versicherungsschein Beitrag jährlich",
|
||||
]
|
||||
|
||||
|
||||
def _merge_hits_from_queries(
|
||||
queries: list[str],
|
||||
es_size: int,
|
||||
pool_cap: int,
|
||||
*,
|
||||
full_path_dedup: bool = False,
|
||||
) -> tuple[list, str | None]:
|
||||
"""Führt mehrere Hybrid-Suchen aus; pro Dedup-Key höchster Score."""
|
||||
best: dict[str, dict] = {}
|
||||
last_err: str | None = None
|
||||
|
||||
def dkey(dn: str) -> str:
|
||||
return _dedup_key_full_doc(dn) if full_path_dedup else _dedup_key(dn)
|
||||
|
||||
def absorb(hits: list) -> None:
|
||||
for h in hits:
|
||||
src = h.get("_source") or {}
|
||||
dn = src.get("docnm_kwd") or "?"
|
||||
dk = dkey(dn)
|
||||
sc = float(h.get("_score") or 0.0)
|
||||
old = best.get(dk)
|
||||
if old is None or sc > float(old.get("_score") or 0.0):
|
||||
best[dk] = h
|
||||
|
||||
for q in queries:
|
||||
q = (q or "").strip()
|
||||
if not q:
|
||||
continue
|
||||
data = _es_hybrid_search(q, es_size)
|
||||
if "_error" in data:
|
||||
last_err = str(data["_error"])
|
||||
log.warning("wide_recall subquery fail %s: %s", q[:40], last_err)
|
||||
continue
|
||||
absorb((data.get("hits") or {}).get("hits") or [])
|
||||
|
||||
merged = sorted(best.values(), key=lambda h: float(h.get("_score") or 0.0), reverse=True)
|
||||
return merged[:pool_cap], last_err
|
||||
|
||||
|
||||
def handle_rag_search(query: str, top_k: int = 8, **kw):
|
||||
if not query or not query.strip():
|
||||
return "rag_search: query fehlt."
|
||||
|
||||
qstrip = query.strip()
|
||||
wide = _is_wide_recall_query(qstrip)
|
||||
cap = MAX_TOP_K_WIDE if wide else MAX_TOP_K_NORMAL
|
||||
top_k = max(MIN_TOP_K, min(int(top_k or 10), cap))
|
||||
es_size = min(ES_SIZE_CAP, max(top_k * 10, 70))
|
||||
|
||||
if wide:
|
||||
subqs = [qstrip]
|
||||
for sq in _WIDE_SUBQUERIES:
|
||||
if sq.lower() not in qstrip.lower():
|
||||
subqs.append(sq)
|
||||
pool_cap = max(top_k * 5, 120)
|
||||
hits, err = _merge_hits_from_queries(
|
||||
subqs[:22],
|
||||
es_size,
|
||||
pool_cap=pool_cap,
|
||||
full_path_dedup=True,
|
||||
)
|
||||
if err and not hits:
|
||||
return f"Fehler bei der Dokumentensuche: {err}"
|
||||
header = (
|
||||
f"**Breitensuche ({len(subqs[:22])} Anfragen, Dedup=voller Pfad) '{qstrip}' — "
|
||||
f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n"
|
||||
)
|
||||
snip_len = 400
|
||||
else:
|
||||
data = _es_hybrid_search(qstrip, es_size)
|
||||
if "_error" in data:
|
||||
return f"Fehler bei der Dokumentensuche: {data['_error']}"
|
||||
hits = (data.get("hits") or {}).get("hits") or []
|
||||
header = f"**Dokumente fuer '{qstrip}' (bis {top_k}):**\n"
|
||||
snip_len = 650
|
||||
|
||||
if not hits:
|
||||
return f"Keine Ergebnisse fuer '{qstrip}' in der Wissensbasis gefunden."
|
||||
|
||||
hits, reranked = _rerank_hits(qstrip, hits)
|
||||
|
||||
seen_docs: set[str] = set()
|
||||
lines: list[str] = []
|
||||
count = 0
|
||||
|
||||
def out_dkey(doc_name: str) -> str:
|
||||
return _dedup_key_full_doc(doc_name) if wide else _dedup_key(doc_name)
|
||||
|
||||
for h in hits:
|
||||
if count >= top_k:
|
||||
break
|
||||
src = h.get("_source") or {}
|
||||
doc_name = src.get("docnm_kwd") or "?"
|
||||
dk = out_dkey(doc_name)
|
||||
if dk in seen_docs:
|
||||
continue
|
||||
seen_docs.add(dk)
|
||||
|
||||
if "_rrf_score" in h:
|
||||
score = float(h["_rrf_score"])
|
||||
score_label = "RRF"
|
||||
elif "_rerank_score" in h:
|
||||
score = float(h["_rerank_score"])
|
||||
score_label = "Rerank"
|
||||
else:
|
||||
score = float(h.get("_score") or 0.0)
|
||||
score_label = "ES"
|
||||
raw = src.get("content_with_weight") or src.get("content_de") or ""
|
||||
content = raw[:snip_len].strip()
|
||||
ocr = _ocr_note(raw)
|
||||
folder = _folder_from_docname(doc_name)
|
||||
filename = doc_name.rsplit("__", 1)[-1] if "__" in doc_name else doc_name
|
||||
folder_line = f" Ordner: {folder}" if folder else ""
|
||||
|
||||
lines.append(
|
||||
f"---\n**{count + 1}. {filename}** ({score_label}: {score:.3f}){ocr}"
|
||||
)
|
||||
if folder_line:
|
||||
lines.append(folder_line)
|
||||
if content:
|
||||
lines.append(f"```\n{content}\n```")
|
||||
count += 1
|
||||
|
||||
if count == 0:
|
||||
return f"Keine Dokumente fuer '{qstrip}' gefunden."
|
||||
|
||||
hdr = header.rstrip() + (
|
||||
" _(Cross-Encoder reranked)_" if reranked else ""
|
||||
) + "\n"
|
||||
lines.insert(0, hdr)
|
||||
tail = (
|
||||
"\n---\n(Ende der Ergebnisse. Nur diese Dokumente in dieser Runde. "
|
||||
+ (
|
||||
"Bei Summen/Zahlen: alle Treffer prüfen; OCR kann unvollständig sein."
|
||||
if wide
|
||||
else ""
|
||||
)
|
||||
+ ")"
|
||||
)
|
||||
lines.append(tail)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"rag_search": handle_rag_search,
|
||||
}
|
||||
310
homelab.conf.bak-20260417-211337
Normal file
310
homelab.conf.bak-20260417-211337
Normal file
|
|
@ -0,0 +1,310 @@
|
|||
# ============================================================
|
||||
# homelab.conf — EINZIGE QUELLE DER WAHRHEIT
|
||||
# ============================================================
|
||||
# Wenn sich eine IP, URL, ein Container oder Passwort ändert:
|
||||
# → NUR DIESE DATEI editieren.
|
||||
# → sync-state.sh liest hieraus und generiert alles andere.
|
||||
# → Niemals STATE.md, MOTDs oder Issues manuell pflegen.
|
||||
# ============================================================
|
||||
|
||||
# --- DOMAINS ---
|
||||
DOMAIN_PRIMARY="arakavanews.com"
|
||||
DOMAIN_OLD="arakava-news-2.orbitalo.net"
|
||||
DOMAIN_MATOMO="matomo.orbitalo.net"
|
||||
DOMAIN_SEAFILE="seafile.orbitalo.net"
|
||||
DOMAIN_GRAFANA="grafana.orbitalo.net"
|
||||
DOMAIN_PDM="pdm.orbitalo.info"
|
||||
DOMAIN_RSS="rss-manager.orbitalo.net"
|
||||
DOMAIN_REDAX="redax.orbitalo.net"
|
||||
|
||||
# ============================================================
|
||||
# SERVER — Eindeutige Benennung nach Standort
|
||||
# ============================================================
|
||||
# Kambodscha (KA): 3 Server, LAN 192.168.0.x
|
||||
# Muldenstein (MU): 3 Server (1 offline), LAN 192.168.178.x
|
||||
# Ramsin (HE): 1 Server bei Helmut
|
||||
# Hetzner DC: 1 Server
|
||||
# ============================================================
|
||||
|
||||
# --- HETZNER ---
|
||||
SRV_HETZNER="100.88.230.59"
|
||||
|
||||
# --- KAMBODSCHA (3 Server, Takeo) ---
|
||||
SRV_KA1="100.122.56.60"
|
||||
SRV_KA1_LOCAL="192.168.0.197"
|
||||
SRV_KA1_HOSTNAME="pve-ka-1"
|
||||
|
||||
SRV_KA2="100.120.126.95"
|
||||
SRV_KA2_LOCAL="192.168.0.198"
|
||||
SRV_KA2_HOSTNAME="pve-ka-2"
|
||||
|
||||
SRV_KA3="100.103.90.94"
|
||||
SRV_KA3_LOCAL="192.168.0.199"
|
||||
SRV_KA3_HOSTNAME="pve-ka-3"
|
||||
|
||||
# --- PHNOM PENH (2 Server, Kondo — pp-cluster) ---
|
||||
SRV_PP1="100.126.26.46"
|
||||
SRV_PP1_LOCAL="192.168.0.171"
|
||||
SRV_PP1_HOSTNAME="pve-pp-1"
|
||||
|
||||
SRV_PP2="100.95.156.25"
|
||||
SRV_PP2_LOCAL="192.168.0.227"
|
||||
SRV_PP2_HOSTNAME="pve-pp-2"
|
||||
|
||||
# --- MULDENSTEIN (3 Server, pve-mu-1 aktuell offline) ---
|
||||
# SRV_MU1="???"
|
||||
# SRV_MU1_HOSTNAME="pve-mu-1"
|
||||
|
||||
SRV_MU2="100.99.101.37"
|
||||
SRV_MU2_LOCAL="192.168.178.123"
|
||||
SRV_MU2_HOSTNAME="pve-mu-2"
|
||||
|
||||
SRV_MU3="100.109.101.12"
|
||||
SRV_MU3_LOCAL="192.168.178.250"
|
||||
SRV_MU3_HOSTNAME="pve-mu-3"
|
||||
|
||||
# --- RAMSIN (bei Helmut) ---
|
||||
SRV_HE="100.87.235.11"
|
||||
SRV_HE_HOSTNAME="pve-he"
|
||||
|
||||
# --- CURSOR / MONITORING BOT (CT 116 auf pve-mu-2, Muldenstein) ---
|
||||
SRV_CURSOR="100.88.230.74"
|
||||
SRV_CURSOR_HOSTNAME="monitoring-bot"
|
||||
|
||||
# --- KI-SERVER (Windows, Muldenstein — Cursor + GPU-Workloads) ---
|
||||
SRV_KI="100.84.255.83"
|
||||
SRV_KI_HOSTNAME="KI-Server"
|
||||
SRV_KI_USER="wutti"
|
||||
SRV_KI_GPU="NVIDIA RTX 3090 24GB"
|
||||
SRV_KI_OS="Windows 10 Build 26200"
|
||||
SRV_KI_SSH="ssh ki-server (Key-Auth via monitoring-bot SOCKS5)"
|
||||
|
||||
# --- BACKUP (PBS) ---
|
||||
SRV_PBS_MU="100.99.139.22"
|
||||
SRV_PBS_KA="lokal"
|
||||
|
||||
# --- PASSWÖRTER ---
|
||||
PW_HETZNER="Astral-Proxmox!2026"
|
||||
PW_DEFAULT="astral66"
|
||||
PW_WP_ADMIN="eJIyhW0p5PFacjvvKGufKeXS"
|
||||
PW_5V8_USER="Holgerhh"
|
||||
PW_5V8_PASS="ddlhh"
|
||||
PW_EDELMETALL_DASHBOARD=""
|
||||
PW_PDM_USER="root"
|
||||
PW_PDM_PASS="astral66"
|
||||
|
||||
# ============================================================
|
||||
# CONTAINER — Format: CT_<VMID>_<SERVER>="name|tailscale_ip|dienste"
|
||||
# Servercodes: HZ=Hetzner, KA1/2/3=Kambodscha, MU2/3=Muldenstein, HE=Ramsin
|
||||
# ============================================================
|
||||
|
||||
# --- pve-hetzner (Hauptinfrastruktur) ---
|
||||
CT_101_HZ="wordpress-v2|100.91.212.19|WordPress + MySQL (Docker) — arakavanews.com"
|
||||
CT_103_HZ="seafile|100.75.247.60|Seafile (Docker)"
|
||||
CT_109_HZ="rss-manager|100.113.244.101|RSS Manager + Matomo — WP intern via http://10.10.10.101"
|
||||
CT_110_HZ="portainer|100.109.206.43|Portainer Docker UI + Loki Stack"
|
||||
CT_111_HZ="forgejo|100.89.246.60|Forgejo Git Server"
|
||||
CT_112_HZ="fuenfvoracht|100.73.171.62|FuenfVorAcht Telegram Bot"
|
||||
CT_113_HZ="redax-wp|100.69.243.16|Redakteur WordPress KI-Autor + DeutschlandBlog"
|
||||
CT_115_HZ="flugscanner-hub|100.92.161.97|Flugpreisscanner Hub + Scheduler"
|
||||
CT_116_HZ="homelab-ai-bot (1 GB RAM)|100.123.47.7|Hausmeister Bot (Qwen3-VL 30B via Ollama/KI-Server, Text+Vision) + Save.TV Web-UI + web_search via SearXNG"
|
||||
CT_121_HZ="deep-research|100.74.196.29|Open Deep Research + SearXNG — LangGraph API auf Port 2024"
|
||||
CT_117_HZ="memory-service|100.121.192.94|Memory Service API (FastAPI + SQLite)"
|
||||
CT_144_HZ="muldenstein-backup|—|Backup-Archiv (Read-Only)"
|
||||
CT_999_HZ="cluster-docu|100.79.8.49|Dokumentation"
|
||||
|
||||
# --- pve-ka-1 (Kambodscha, Hauptserver) ---
|
||||
CT_110_KA1="uptime-kuma|—|Uptime Monitoring"
|
||||
CT_115_KA1="flugscanner-asia|100.112.190.22|Scraping-Node Asia"
|
||||
CT_118_KA1="Django-Klon-Neu|—|Django App (Taxi)"
|
||||
CT_134_KA1="gold-silber-v3|100.72.230.87|Edelmetall Dashboard + Telegram Bot"
|
||||
CT_200_KA1="doc-converter|—|Dokument-Konverter"
|
||||
CT_888_KA1="MCP-Proxmox|—|MCP Server"
|
||||
CT_999_KA1="cluster-docu|—|Dokumentation"
|
||||
VM_100_KA1="debian|—|Debian VM"
|
||||
|
||||
# --- pve-pp-1 (Phnom Penh, Kondo — Arbeitsmaschine) ---
|
||||
CT_100_PP1="yt-desktop|100.112.224.39|XFCE Desktop + xrdp + Chromium/Firefox + Seafile-Sync (Videos) + NFS-Mount Torrents"
|
||||
CT_103_PP1="torrent|—|qBittorrent Web-UI :8080 (192.168.0.129) + NFS-Export → CT 100"
|
||||
|
||||
# --- pve-pp-2 (Phnom Penh, Kondo — Reserve/Standby) ---
|
||||
CT_101_PP2="yt-desktop-standby|—|Standby-Kopie CT 100 (gestoppt)"
|
||||
CT_102_PP2="torrent|—|qBittorrent Web-UI :8080 (192.168.0.193)"
|
||||
|
||||
# --- pve-ka-2 (Kambodscha, Shop-Server) ---
|
||||
CT_504_KA2="Shop-Template|—|Shop Template (stopped)"
|
||||
CT_8000_KA2="Kunde0-Shop|—|Kunde 0 Shop (stopped)"
|
||||
CT_8010_KA2="Kunde1-Shop|—|Kunde 1 Shop (stopped)"
|
||||
|
||||
# --- pve-ka-3 (Kambodscha, Webcam + Dienste) ---
|
||||
CT_101_KA3="freshrss|—|FreshRSS Reader"
|
||||
CT_103_KA3="Intercity-Taxi|—|Intercity Taxi App"
|
||||
CT_104_KA3="bt-search|—|BT Search"
|
||||
CT_141_KA3="llm-router-dev|—|LLM Router Entwicklung"
|
||||
CT_600_KA3="webcam|100.80.76.118|Restreamer + Dahua 4K Cam → cam.arakavanews.com"
|
||||
VM_500_KA3="frigate-vm|100.104.64.99|Frigate NVR + Coral TPU — 3 Kameras, GUI :5000"
|
||||
|
||||
# --- pve-mu-2 (Muldenstein, Shop- & Entwicklungsserver) ---
|
||||
CT_111_MU2="uptimekuma|—|Uptime Monitoring"
|
||||
CT_112_MU2="myspeed|—|Internet Speedtest"
|
||||
CT_113_MU2="pve-scripts-local|—|PVE Helper Scripts"
|
||||
CT_114_MU2="djangoadmin|—|Django Admin"
|
||||
CT_115_MU2="Takeo-PC-Shop-Engl|—|PC Shop (englisch)"
|
||||
CT_116_MU2="monitoring-bot|100.88.230.74|Cursor IDE + Tailscale-Gateway (userspace) + SSH-Hub — CT 116 auf pve-mu-2 (Debian 13, 3.4GB RAM)"
|
||||
CT_117_MU2="Intercity-Taxi|—|Intercity Taxi"
|
||||
CT_123_MU2="Kofi-Shop-PP|—|Kofi Shop Phnom Penh"
|
||||
CT_128_MU2="rustdeskserver|—|RustDesk Remote Desktop"
|
||||
CT_130_MU2="PC-Shop-Takeo|—|PC Shop Takeo"
|
||||
CT_131_MU2="PC-Shopp-PP|—|PC Shop Phnom Penh"
|
||||
CT_136_MU2="Seleniumbase|—|Selenium Scraping"
|
||||
CT_140_MU2="Alfredo-Pizza|—|Pizza Alfredo"
|
||||
CT_150_MU2="Pizza-Express-Wolfen|—|Pizza Express Wolfen"
|
||||
CT_160_MU2="Red-Pizza|—|Red Pizza"
|
||||
CT_180_MU2="Mellensa-Pizza|—|Mellensa Pizza"
|
||||
CT_190_MU2="Ali-Baba|—|Ali Baba"
|
||||
CT_200_MU2="Pizza-Di-Angelo|—|Pizza Di Angelo"
|
||||
CT_500_MU2="Test-Shop|—|Test Shop"
|
||||
CT_501_MU2="Test-Shop-Prod|—|Test Shop Produktion"
|
||||
CT_502_MU2="Test-Shop-2|—|Test Shop 2"
|
||||
|
||||
# --- pve-mu-3 (Muldenstein, Infrastruktur + Mirrors) ---
|
||||
CT_139_MU3="Syncthing-Muldenstein|—|Syncthing"
|
||||
CT_141_MU3="syncthing|—|Syncthing"
|
||||
CT_142_MU3="WG-easy|—|WireGuard VPN"
|
||||
CT_143_MU3="Raspi-Broker|—|ioBroker MQTT Broker"
|
||||
CT_145_MU3="flugscanner-mu|100.75.182.15|Flugpreisscanner Node DE"
|
||||
CT_504_MU3="projektscan-template|—|Projektscan Template"
|
||||
CT_600_MU3="wp-mirror|100.92.205.101|WordPress Mirror (Redundanz CT 101)"
|
||||
CT_601_MU3="rss-mirror|—|RSS Manager Mirror (Redundanz CT 109)"
|
||||
CT_700_MU3="ragflow|192.168.178.154|RAGFlow PDF-RAG (Docker, Ollama/KI-Server, Synology SMB) — ~13k PDFs"
|
||||
VM_144_MU3="BT-Bridge|—|BT Bridge VM"
|
||||
|
||||
# --- pve-he (Ramsin, bei Helmut) ---
|
||||
# Container noch nicht inventarisiert
|
||||
|
||||
# --- TELEGRAM BOTS ---
|
||||
TG_CHAT_ID="674951792"
|
||||
TG_MUTTER_TOKEN="8551565940:AAHIUpZND-tCNGv9yEoNPRyPt4GxEPYBJdE"
|
||||
TG_FUENFVORACHT_TOKEN="8799990587:AAEoQuohGdoJ2WudoOHs_j5Ns3iwft6OlFc"
|
||||
TG_EDELMETALL_TOKEN="8262992299:AAEf8YHPsz42ZdP85DV7JqC4822Ts75GqF4"
|
||||
TG_HAUSMEISTER_TOKEN="8390233104:AAHdgF6r7qZsQEZHIBHPV1ky3v-6-YULvj8"
|
||||
|
||||
# --- PROXMOX API TOKENS ---
|
||||
PVE_TOKEN_HETZNER_NAME="mcp-homelab"
|
||||
PVE_TOKEN_HETZNER_VALUE="e986d3d5-36c0-425c-b1bb-20ed650a8065"
|
||||
|
||||
# --- API KEYS ---
|
||||
FORGEJO_TOKEN="b874766bdf357bd4c32fa4369d0c588fc6193336"
|
||||
FORGEJO_SYNC_TOKEN="b874766bdf357bd4c32fa4369d0c588fc6193336"
|
||||
GITHUB_PAT="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"
|
||||
OPENROUTER_KEY="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"
|
||||
OPENAI_API_KEY="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"
|
||||
MEMORY_API_TOKEN="Ai8eeQibV6Z1RWc7oNPim4PXB4vILU1nRW2-XgRcX2M"
|
||||
MEMORY_API_URL="http://100.121.192.94:8400"
|
||||
MATOMO_TOKEN="7d3987d48dcd7fdf9776bd81a4da1778"
|
||||
MATOMO_URL="http://100.113.244.101"
|
||||
MATOMO_SITE_ID="1"
|
||||
|
||||
# --- HOMELAB MCP-SERVER (auf pve-hetzner Host) ---
|
||||
MCP_PATH="/root/homelab-mcp"
|
||||
MCP_VENV="/root/homelab-mcp/.venv"
|
||||
MCP_TOOLS="homelab_overview,homelab_all_containers,homelab_container_status,homelab_query_logs,homelab_get_errors,homelab_check_silence,homelab_host_health,homelab_metrics,homelab_get_config,homelab_loki_labels,homelab_prometheus_targets"
|
||||
|
||||
# --- SAVE.TV (Online-Videorecorder) ---
|
||||
SAVETV_USER="739281"
|
||||
SAVETV_PASS="Astral1966"
|
||||
SAVETV_URL="https://www.save.tv"
|
||||
# Download-Pipeline: AKTIV (Save.TV → Hetzner CT116 → Jellyfin-Server → NAS Muldenstein)
|
||||
# savetv_sync.py läuft stündlich auf Jellyfin-Server (100.77.105.3), 24h±30min Delay, min. 700MB
|
||||
# CT 116 /etc/hosts: www.save.tv → 172.66.146.119 (DNS-GIL-Fix)
|
||||
# CT 116 RAM: 1 GB (war 512 MB, hat alles einfrieren lassen)
|
||||
# Ziel: Samba-Share auf RAID in Muldenstein → Jellyfin-Mediathek
|
||||
# Architektur: Save.TV → pve-hetzner (temp) → Samba/CIFS → Jellyfin-Ordner
|
||||
# TODO: Share-IP, Share-Name, Credentials, Jellyfin-Pfad ermitteln
|
||||
# TODO: Download-Endpoint reverse-engineeren (vermutlich SendungsDetails.cfm)
|
||||
# TODO: cifs-mount oder smbclient fuer Transfer
|
||||
# Status: Login+EPG+AutoRecord FERTIG | Download+Sync OFFEN
|
||||
|
||||
# --- E-MAIL (All-Inkl IMAP-Spiegel von GMX) ---
|
||||
MAIL_IMAP_SERVER="w0206aa8.kasserver.com"
|
||||
MAIL_IMAP_PORT="993"
|
||||
MAIL_USER="info@orbitalo.info"
|
||||
MAIL_PASS="Astral-66"
|
||||
|
||||
# --- LOKI ---
|
||||
LOKI_URL="http://100.109.206.43:3100"
|
||||
LOKI_CT="110"
|
||||
|
||||
# --- PROMETHEUS ---
|
||||
PROMETHEUS_URL="http://100.88.230.59:9090"
|
||||
PROMETHEUS_STATUS="aktiv"
|
||||
|
||||
# --- ROUTING (Cloudflare Tunnels) ---
|
||||
# Format: TUNNEL_<ct>_<server>="domain|ziel|status"
|
||||
TUNNEL_101_HZ="arakavanews.com|:80|aktiv"
|
||||
TUNNEL_101_HZ_OLD="arakava-news-2.orbitalo.net|301→arakavanews.com|aktiv"
|
||||
TUNNEL_109_HZ="matomo.orbitalo.net|:80|aktiv"
|
||||
TUNNEL_600_KA3="cam.arakavanews.com|:8080|aktiv"
|
||||
TUNNEL_600_MU3="arakavanews.com|:80|standby"
|
||||
TUNNEL_601_MU3="rss-manager|:8080|standby"
|
||||
|
||||
# ============================================================
|
||||
# GEPLANTE HARDWARE (noch nicht online)
|
||||
# ============================================================
|
||||
|
||||
# --- KI-Tower (Muldenstein, geplant) ---
|
||||
# Logischer Name: ki-tower
|
||||
# CPU: AMD Ryzen 7 7700 (8C/16T)
|
||||
# RAM: 64 GB DDR5
|
||||
# GPU: NVIDIA RTX 3090 (24 GB VRAM)
|
||||
# Storage: 1 TB NVMe
|
||||
# Rolle: Chef — Orchestrator, Szenenplan (Qwen 14B), Hero-Bilder (FLUX.1-dev),
|
||||
# Assembly (FFmpeg + NVENC), production.db (SQLite)
|
||||
# Skripte: GPT-5.4 via OpenAI API (Cloud, ~0.10-0.50 EUR/Skript)
|
||||
# OS: geplant Debian 12 + Docker + CUDA
|
||||
# Dienste: vLLM (:8401), ComfyUI (:8402), Orchestrator (Python)
|
||||
# Projekt: ki-video/PLAN.md
|
||||
# Status: geplant
|
||||
|
||||
# --- NVIDIA Worker-Rig (Muldenstein, geplant) --- PRIMAERER WORKER
|
||||
# Logischer Name: gpu-worker
|
||||
# GPUs: 4x NVIDIA RTX 3080 (je 10 GB GDDR6X, PCIe 4.0 x16)
|
||||
# CUDA: voll unterstuetzt, identischer Stack wie ki-tower
|
||||
# Rolle: Produktiver Worker-Pool fuer KI-Video Pipeline
|
||||
# GPU #0: XTTS v2 (TTS, ~4 GB) → danach SDXL Bilder (~7 GB) :8501
|
||||
# GPU #1: SDXL (Standard-Szenen, ~7 GB, durchgehend) :8502
|
||||
# GPU #2: SadTalker (Avatar lip-sync, ~6 GB, wartet auf TTS) :8503
|
||||
# GPU #3: Real-ESRGAN (Upscaling) + Whisper (Untertitel) :8504
|
||||
# OS: geplant Debian 12 + Docker + CUDA
|
||||
# Architektur: 1 Container pro GPU, feste Zuordnung, HTTP-API pro Worker
|
||||
# Status: geplant
|
||||
#
|
||||
# --- AMD Mining-Rig (Muldenstein, Reserve) --- NEBENROLLE
|
||||
# Logischer Name: gpu-reserve
|
||||
# GPUs: 8x AMD Radeon RX 6600 XT Dual (je 8 GB GDDR6, PCIe 4.0 x8)
|
||||
# Chip: Navi 23 (gfx1032), RDNA 2
|
||||
# ROCm: inoffiziell (HSA_OVERRIDE_GFX_VERSION=10.3.0 noetig)
|
||||
# Funktion: Reserve/Nebenrolle — Whisper (whisper.cpp), CPU-Batch-Jobs
|
||||
# Prioritaet: Niedrig. Nur einsetzen wenn 3080-Rig ausgelastet.
|
||||
# Alternative: Verkaufen und Erloese in Storage/RAM investieren.
|
||||
# OS: geplant Debian 12 + Docker + ROCm (falls benoetigt)
|
||||
# Status: zurueckgestellt, Entscheidung nach 3080-Rig-Aufbau
|
||||
|
||||
# ============================================================
|
||||
# NAMENS-MAPPING (Alt → Neu) — Referenz für Migration
|
||||
# ============================================================
|
||||
# pve-hetzner → pve-hetzner (unverändert)
|
||||
# pve1 → pve-ka-1 (Kambodscha, Hauptserver)
|
||||
# pve-Shops → pve-ka-2 (Kambodscha, Shop-Server)
|
||||
# pve3 (KH) → pve-ka-3 (Kambodscha, Webcam + Dienste)
|
||||
# pve2 → pve-mu-2 (Muldenstein, Shops & Entwicklung)
|
||||
# pve3 (MU) → pve-mu-3 (Muldenstein, Infra + Mirrors)
|
||||
# pve-mu-1 → offline (Muldenstein, noch nicht inventarisiert)
|
||||
# helmut-pve → pve-he (Ramsin, bei Helmut)
|
||||
# PBS → pbs-mu (PBS Muldenstein)
|
||||
# ============================================================
|
||||
|
||||
# OpenMemory (CT 122) - optional, Default: http://10.10.10.122:8765
|
||||
# OPENMEMORY_API_URL="http://10.10.10.122:8765"
|
||||
# OPENMEMORY_USER_ID="orbitalo"
|
||||
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
# Infrastruktur — Live State
|
||||
> Auto-generiert: 2026-04-17 21:00
|
||||
> Auto-generiert: 2026-04-17 21:15
|
||||
|
||||
## pve-hetzner Disk
|
||||
| Mount | Belegt |
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Smart Home Muldenstein — Live State
|
||||
> Auto-generiert: 2026-04-17 21:00
|
||||
> Auto-generiert: 2026-04-17 21:15
|
||||
|
||||
## Backup-Status
|
||||
- Letztes Backup: 696MB, 2026-04-17 04:43
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue