fix(rag): RRF fusion, 512-char snippets, 15 candidates — speed+quality
This commit is contained in:
parent
3c455e7ad7
commit
a3735bf265
1 changed files with 26 additions and 10 deletions
|
|
@ -22,8 +22,9 @@ EMBED_MODEL = "nomic-embed-text"
|
||||||
|
|
||||||
# Cross-Encoder Reranking (CT 123, pve-hetzner LAN)
|
# Cross-Encoder Reranking (CT 123, pve-hetzner LAN)
|
||||||
RERANKER_URL = "http://10.10.10.123:8099"
|
RERANKER_URL = "http://10.10.10.123:8099"
|
||||||
RERANK_CANDIDATES = 30
|
RERANK_CANDIDATES = 15
|
||||||
RERANK_TIMEOUT = 120
|
RERANK_TIMEOUT = 30
|
||||||
|
RERANK_SNIPPET_CHARS = 512
|
||||||
|
|
||||||
MIN_TOP_K = 5
|
MIN_TOP_K = 5
|
||||||
# Breite Übersichten: mehr ES-Runden, mehr distinct Treffer (pro vollem Pfad docnm_kwd)
|
# Breite Übersichten: mehr ES-Runden, mehr distinct Treffer (pro vollem Pfad docnm_kwd)
|
||||||
|
|
@ -207,12 +208,14 @@ def _es_hybrid_search(query: str, es_size: int) -> dict:
|
||||||
|
|
||||||
|
|
||||||
def _snippet_for_rerank(src: dict) -> str:
|
def _snippet_for_rerank(src: dict) -> str:
|
||||||
|
doc_name = src.get("docnm_kwd") or ""
|
||||||
raw = src.get("content_with_weight") or src.get("content_de") or ""
|
raw = src.get("content_with_weight") or src.get("content_de") or ""
|
||||||
return raw[:4000]
|
prefix = doc_name[:120] + "\n" if doc_name else ""
|
||||||
|
return prefix + raw[:RERANK_SNIPPET_CHARS]
|
||||||
|
|
||||||
|
|
||||||
def _rerank_hits(query: str, hits: list) -> tuple[list, bool]:
|
def _rerank_hits(query: str, hits: list) -> tuple[list, bool]:
|
||||||
"""Sortiert die ersten RERANK_CANDIDATES Treffer per Cross-Encoder neu."""
|
"""Rerankt mit Cross-Encoder, kombiniert Score mit ES-Rang (RRF)."""
|
||||||
if not hits or not RERANKER_URL:
|
if not hits or not RERANKER_URL:
|
||||||
return hits, False
|
return hits, False
|
||||||
to_score = hits[:RERANK_CANDIDATES]
|
to_score = hits[:RERANK_CANDIDATES]
|
||||||
|
|
@ -241,17 +244,27 @@ def _rerank_hits(query: str, hits: list) -> tuple[list, bool]:
|
||||||
len(to_score),
|
len(to_score),
|
||||||
)
|
)
|
||||||
return hits, False
|
return hits, False
|
||||||
indexed = list(zip(scores, range(len(to_score))))
|
|
||||||
indexed.sort(key=lambda x: x[0], reverse=True)
|
k = 60
|
||||||
|
combined: list[tuple[float, int]] = []
|
||||||
|
for idx, (h, rr_score) in enumerate(zip(to_score, scores)):
|
||||||
|
es_rank = idx + 1
|
||||||
|
rr_sorted = sorted(scores, reverse=True)
|
||||||
|
rr_rank = rr_sorted.index(rr_score) + 1
|
||||||
|
rrf = 1.0 / (k + es_rank) + 1.0 / (k + rr_rank)
|
||||||
|
combined.append((rrf, idx))
|
||||||
|
combined.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
|
||||||
new_order: list = []
|
new_order: list = []
|
||||||
for sc, idx in indexed:
|
for rrf, idx in combined:
|
||||||
h = to_score[idx]
|
h = to_score[idx]
|
||||||
h["_rerank_score"] = float(sc)
|
h["_rerank_score"] = float(scores[idx])
|
||||||
|
h["_rrf_score"] = float(rrf)
|
||||||
new_order.append(h)
|
new_order.append(h)
|
||||||
rest = hits[RERANK_CANDIDATES:]
|
rest = hits[RERANK_CANDIDATES:]
|
||||||
return new_order + rest, True
|
return new_order + rest, True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.warning("rerank failed: %s", e)
|
log.warning("rerank failed (fallback to ES): %s", e)
|
||||||
return hits, False
|
return hits, False
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -435,7 +448,10 @@ def handle_rag_search(query: str, top_k: int = 8, **kw):
|
||||||
continue
|
continue
|
||||||
seen_docs.add(dk)
|
seen_docs.add(dk)
|
||||||
|
|
||||||
if "_rerank_score" in h:
|
if "_rrf_score" in h:
|
||||||
|
score = float(h["_rrf_score"])
|
||||||
|
score_label = "RRF"
|
||||||
|
elif "_rerank_score" in h:
|
||||||
score = float(h["_rerank_score"])
|
score = float(h["_rerank_score"])
|
||||||
score_label = "Rerank"
|
score_label = "Rerank"
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue