diff --git a/homelab-ai-bot/tools/rag.py b/homelab-ai-bot/tools/rag.py index 154588ff..768cefe4 100644 --- a/homelab-ai-bot/tools/rag.py +++ b/homelab-ai-bot/tools/rag.py @@ -262,6 +262,16 @@ def _is_wide_recall_query(q: str) -> bool: ) return costish and broad + # Immobilien / Wohnungen / Kambodscha + if any(x in ql for x in ("wohnung", "immobilie", "condo", "apartment", "grundstück", "grundstueck")): + if any(x in ql for x in ("welche", "alle", "liste", "habe ich", "übersicht", "uebersicht", "wie viele")): + return True + if any(x in ql for x in ("kambodscha", "cambodia", "takeo", "phnom", "sihanouk")): + if any(x in ql for x in ("welche", "alle", "wohnung", "immobilie", "haus", "condo", "apartment", "mietvertrag")): + return True + + return False + # Zusatzanfragen decken Sparten + Gesellschaften ab (Recall) _WIDE_SUBQUERIES = [ @@ -284,6 +294,23 @@ _WIDE_SUBQUERIES = [ "Kfz Versicherungsschein Beitrag jährlich", ] +_WIDE_SUBQUERIES_IMMOBILIEN = [ + "Arakawa Wohnung Mietvertrag", + "Arakawa Wohnung D1603", + "Arakawa Wohnung G2010", + "Arakawa Wohnung-2", + "Kambodscha Arakawa Kaufvertrag", + "Kambodscha Arakawa Vollmacht", + "Kambodscha Arakawa Überweisung", + "Wohnung Mietvertrag Kambodscha", + "Condo Apartment Cambodia", + "Hard Title Wohnung", + "Wohnungen Kurtzübersicht", + "Mietvertrag Ramirez Antonio", + "Mietvertrag Cheng Qiu", + "Kambodscha Rechnungen Strom Miete", +] + def _merge_hits_from_queries( queries: list[str], @@ -335,9 +362,16 @@ def handle_rag_search(query: str, top_k: int = 8, **kw): es_size = min(ES_SIZE_CAP, max(top_k * 10, 70)) if wide: + ql = qstrip.lower() + _immo_wide = any( + x in ql + for x in ("wohnung", "immobilie", "condo", "apartment", "grundstück", "grundstueck", + "kambodscha", "cambodia", "arakawa") + ) + _sq_pool = _WIDE_SUBQUERIES_IMMOBILIEN if _immo_wide else _WIDE_SUBQUERIES subqs = [qstrip] - for sq in _WIDE_SUBQUERIES: - if sq.lower() not in qstrip.lower(): + for sq in _sq_pool: + if sq.lower() not in ql: subqs.append(sq) pool_cap = max(top_k * 5, 120) hits, err = _merge_hits_from_queries(