fix(rag): timeout 45s, RRF rank-map bug fix
This commit is contained in:
parent
a3735bf265
commit
3b6ee14b37
1 changed files with 6 additions and 4 deletions
|
|
@ -23,7 +23,7 @@ EMBED_MODEL = "nomic-embed-text"
|
||||||
# Cross-Encoder Reranking (CT 123, pve-hetzner LAN)
|
# Cross-Encoder Reranking (CT 123, pve-hetzner LAN)
|
||||||
RERANKER_URL = "http://10.10.10.123:8099"
|
RERANKER_URL = "http://10.10.10.123:8099"
|
||||||
RERANK_CANDIDATES = 15
|
RERANK_CANDIDATES = 15
|
||||||
RERANK_TIMEOUT = 30
|
RERANK_TIMEOUT = 45
|
||||||
RERANK_SNIPPET_CHARS = 512
|
RERANK_SNIPPET_CHARS = 512
|
||||||
|
|
||||||
MIN_TOP_K = 5
|
MIN_TOP_K = 5
|
||||||
|
|
@ -246,11 +246,13 @@ def _rerank_hits(query: str, hits: list) -> tuple[list, bool]:
|
||||||
return hits, False
|
return hits, False
|
||||||
|
|
||||||
k = 60
|
k = 60
|
||||||
|
rr_ranked = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
|
||||||
|
rr_rank_map = {i: rank + 1 for rank, i in enumerate(rr_ranked)}
|
||||||
|
|
||||||
combined: list[tuple[float, int]] = []
|
combined: list[tuple[float, int]] = []
|
||||||
for idx, (h, rr_score) in enumerate(zip(to_score, scores)):
|
for idx in range(len(to_score)):
|
||||||
es_rank = idx + 1
|
es_rank = idx + 1
|
||||||
rr_sorted = sorted(scores, reverse=True)
|
rr_rank = rr_rank_map[idx]
|
||||||
rr_rank = rr_sorted.index(rr_score) + 1
|
|
||||||
rrf = 1.0 / (k + es_rank) + 1.0 / (k + rr_rank)
|
rrf = 1.0 / (k + es_rank) + 1.0 / (k + rr_rank)
|
||||||
combined.append((rrf, idx))
|
combined.append((rrf, idx))
|
||||||
combined.sort(key=lambda x: x[0], reverse=True)
|
combined.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue