Auto-Sync: 2026-03-26 12:30
This commit is contained in:
parent
ad19a6125e
commit
9d79af7481
4 changed files with 122 additions and 3 deletions
|
|
@ -1,5 +1,5 @@
|
|||
# Arakava News — Live State
|
||||
> Auto-generiert: 2026-03-26 12:15
|
||||
> Auto-generiert: 2026-03-26 12:30
|
||||
|
||||
## Service Status
|
||||
| Service | CT | Status |
|
||||
|
|
|
|||
119
homelab-ai-bot/tools/rag.py
Normal file
119
homelab-ai-bot/tools/rag.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
"""RAG Dokumentensuche - durchsucht die Hausmeister-Wissensbasis (RAGFlow)."""
|
||||
|
||||
import json
|
||||
import urllib.request
|
||||
import logging
|
||||
|
||||
log = logging.getLogger("tools.rag")
|
||||
|
||||
RAGFLOW_URL = "http://100.109.101.12:8080/api/v1"
|
||||
RAGFLOW_TOKEN = "ragflow-test-token-2026"
|
||||
DATASET_ID = "dc24edda27a311f19fe7fb811de6f016"
|
||||
|
||||
TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "rag_search",
|
||||
"description": (
|
||||
"Durchsucht die private Dokumenten-Wissensbasis (>21.000 Dokumente: "
|
||||
"Vertraege, Versicherungen, Rente, Finanzamt, Familiendokumente, "
|
||||
"Anleitungen, Buecher, persoenliche Unterlagen). "
|
||||
"Nutze dieses Tool wenn der User nach einem bestimmten Dokument, "
|
||||
"Vertrag, Brief oder persoenlicher Information fragt."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Suchanfrage: Dokumentname, Thema oder Inhalt. Kurz und praezise, z.B. 'Familienbuch Opa Oma' oder 'Grundsteuer Erklaerung 2024'"
|
||||
},
|
||||
"top_k": {
|
||||
"type": "integer",
|
||||
"description": "Anzahl Ergebnisse (1-10)",
|
||||
"default": 5
|
||||
}
|
||||
},
|
||||
"required": ["query"]
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
SYSTEM_PROMPT_EXTRA = """RAG DOKUMENTENSUCHE:
|
||||
Du hast Zugriff auf eine private Wissensbasis mit >21.000 Dokumenten (Vertraege, Versicherungen, Rente, Finanzamt, Familiendokumente, Anleitungen, Buecher, persoenliche Unterlagen).
|
||||
Nutze rag_search wenn der User nach Dokumenten, Vertraegen, persoenlichen Unterlagen oder Informationen aus seinen Dateien fragt.
|
||||
Die Suchanfrage sollte kurze Keywords sein, KEINE ganzen Saetze. Beispiele:
|
||||
- "Familienbuch Opa Oma"
|
||||
- "Grundsteuer Erklaerung"
|
||||
- "Nürnberger Versicherung"
|
||||
- "Allianz Beitraege"
|
||||
Bei schlechten Ergebnissen: andere Keywords versuchen oder Dokumentnamen direkt suchen."""
|
||||
|
||||
|
||||
def _api_call(path, method="GET", body=None):
|
||||
url = f"{RAGFLOW_URL}{path}"
|
||||
data = json.dumps(body).encode("utf-8") if body else None
|
||||
req = urllib.request.Request(
|
||||
url, data=data,
|
||||
headers={
|
||||
"Authorization": f"Bearer {RAGFLOW_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
method=method,
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read())
|
||||
|
||||
|
||||
def handle_rag_search(query: str, top_k: int = 5, **kw):
|
||||
if not query or not query.strip():
|
||||
return "rag_search: query fehlt."
|
||||
|
||||
top_k = max(1, min(int(top_k or 5), 10))
|
||||
|
||||
try:
|
||||
result = _api_call("/retrieval", method="POST", body={
|
||||
"question": query,
|
||||
"dataset_ids": [DATASET_ID],
|
||||
"top_k": 2048,
|
||||
"similarity_threshold": 0.1,
|
||||
"vector_similarity_weight": 0.3,
|
||||
})
|
||||
|
||||
chunks = result.get("data", {}).get("chunks", [])
|
||||
if not chunks:
|
||||
return f"Keine Ergebnisse fuer '{query}' in der Wissensbasis gefunden."
|
||||
|
||||
seen_docs = set()
|
||||
lines = [f"**{len(chunks)} Treffer fuer '{query}'** (Top {top_k}):\n"]
|
||||
|
||||
count = 0
|
||||
for c in chunks:
|
||||
if count >= top_k:
|
||||
break
|
||||
doc_name = c.get("document_keyword", c.get("document_name", "?"))
|
||||
sim = c.get("similarity", 0)
|
||||
content = c.get("content", "")[:400].strip()
|
||||
|
||||
doc_key = doc_name
|
||||
if doc_key in seen_docs:
|
||||
continue
|
||||
seen_docs.add(doc_key)
|
||||
|
||||
lines.append(f"---\n**{count+1}. {doc_name}** (Relevanz: {sim:.0%})")
|
||||
if content:
|
||||
lines.append(f"```\n{content}\n```")
|
||||
count += 1
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
except Exception as e:
|
||||
log.error("RAG search error: %s", e)
|
||||
return f"Fehler bei der Dokumentensuche: {e}"
|
||||
|
||||
|
||||
HANDLERS = {
|
||||
"rag_search": handle_rag_search,
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
# Infrastruktur — Live State
|
||||
> Auto-generiert: 2026-03-26 12:15
|
||||
> Auto-generiert: 2026-03-26 12:30
|
||||
|
||||
## pve-hetzner Disk
|
||||
| Mount | Belegt |
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# Smart Home Muldenstein — Live State
|
||||
> Auto-generiert: 2026-03-26 12:15
|
||||
> Auto-generiert: 2026-03-26 12:30
|
||||
|
||||
## Backup-Status
|
||||
- Letztes Backup: 589MB, 2026-03-26 04:01
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue