diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py index 67af0a58..3f3e4bf8 100644 --- a/homelab-ai-bot/llm.py +++ b/homelab-ai-bot/llm.py @@ -450,7 +450,7 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None, d question + " Versicherung Beitrag Beitragsrechnung Jahresbetrag" ) - _rag_res = _rag_fn(query=_rag_q, top_k=25) + _rag_res = _rag_fn(query=_rag_q, top_k=60) if _rag_res and not _rag_res.startswith("Keine"): log.info("RAG-Pflicht: %d Zeichen — loesche Session-History", len(str(_rag_res))) messages = [ @@ -467,9 +467,9 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None, d {"role": "assistant", "content": None, "tool_calls": [{"id": "forced_rag", "type": "function", "function": {"name": "rag_search", - "arguments": json.dumps({"query": _rag_q, "top_k": 25})}}]}, + "arguments": json.dumps({"query": _rag_q, "top_k": 60})}}]}, {"role": "tool", "tool_call_id": "forced_rag", - "content": str(_rag_res)[:32000]}, + "content": str(_rag_res)[:100000]}, {"role": "user", "content": question}, ] except Exception as e: diff --git a/homelab-ai-bot/tools/rag.py b/homelab-ai-bot/tools/rag.py index 7213df44..ca9b9fa7 100644 --- a/homelab-ai-bot/tools/rag.py +++ b/homelab-ai-bot/tools/rag.py @@ -21,10 +21,10 @@ OLLAMA_EMBED_URL = "http://100.84.255.83:11434/api/embeddings" EMBED_MODEL = "nomic-embed-text" MIN_TOP_K = 5 -# Breite Übersichten: mehr ES-Runden, mehr distinct Treffer -MAX_TOP_K_NORMAL = 15 -MAX_TOP_K_WIDE = 25 -ES_SIZE_CAP = 150 +# Breite Übersichten: mehr ES-Runden, mehr distinct Treffer (pro vollem Pfad docnm_kwd) +MAX_TOP_K_NORMAL = 25 +MAX_TOP_K_WIDE = 60 +ES_SIZE_CAP = 200 TOOLS = [ { @@ -146,6 +146,11 @@ def _dedup_key(name: str) -> str: return key.lower() +def _dedup_key_full_doc(name: str) -> str: + """Ein Chunk pro vollem docnm_kwd — gleicher Dateiname in verschiedenen Ordnern bleibt getrennt.""" + return re.sub(r"\s+", " ", (name or "").strip().lower()) + + def _es_hybrid_search(query: str, es_size: int) -> dict: qvec = _ollama_embed(query) if not qvec: @@ -273,16 +278,25 @@ _WIDE_SUBQUERIES = [ ] -def _merge_hits_from_queries(queries: list[str], es_size: int, pool_cap: int) -> tuple[list, str | None]: - """Führt mehrere Hybrid-Suchen aus; pro Dokument höchster Score.""" +def _merge_hits_from_queries( + queries: list[str], + es_size: int, + pool_cap: int, + *, + full_path_dedup: bool = False, +) -> tuple[list, str | None]: + """Führt mehrere Hybrid-Suchen aus; pro Dedup-Key höchster Score.""" best: dict[str, dict] = {} last_err: str | None = None + def dkey(dn: str) -> str: + return _dedup_key_full_doc(dn) if full_path_dedup else _dedup_key(dn) + def absorb(hits: list) -> None: for h in hits: src = h.get("_source") or {} dn = src.get("docnm_kwd") or "?" - dk = _dedup_key(dn) + dk = dkey(dn) sc = float(h.get("_score") or 0.0) old = best.get(dk) if old is None or sc > float(old.get("_score") or 0.0): @@ -318,15 +332,20 @@ def handle_rag_search(query: str, top_k: int = 8, **kw): for sq in _WIDE_SUBQUERIES: if sq.lower() not in qstrip.lower(): subqs.append(sq) - pool_cap = max(top_k * 3, 45) - hits, err = _merge_hits_from_queries(subqs[:16], es_size, pool_cap=pool_cap) + pool_cap = max(top_k * 5, 120) + hits, err = _merge_hits_from_queries( + subqs[:22], + es_size, + pool_cap=pool_cap, + full_path_dedup=True, + ) if err and not hits: return f"Fehler bei der Dokumentensuche: {err}" header = ( - f"**Breitensuche ({len(subqs[:16])} Anfragen gemerged) fuer '{qstrip}' — " - f"{len(hits)} Kandidaten, zeige bis {top_k} distinct:**\n" + f"**Breitensuche ({len(subqs[:22])} Anfragen, Dedup=voller Pfad) '{qstrip}' — " + f"{len(hits)} Kandidaten, zeige bis {top_k}:**\n" ) - snip_len = 750 + snip_len = 400 else: data = _es_hybrid_search(qstrip, es_size) if "_error" in data: @@ -342,12 +361,15 @@ def handle_rag_search(query: str, top_k: int = 8, **kw): lines: list[str] = [] count = 0 + def out_dkey(doc_name: str) -> str: + return _dedup_key_full_doc(doc_name) if wide else _dedup_key(doc_name) + for h in hits: if count >= top_k: break src = h.get("_source") or {} doc_name = src.get("docnm_kwd") or "?" - dk = _dedup_key(doc_name) + dk = out_dkey(doc_name) if dk in seen_docs: continue seen_docs.add(dk)