diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py
index 8fd6367d..3aad2a56 100644
--- a/homelab-ai-bot/llm.py
+++ b/homelab-ai-bot/llm.py
@@ -332,7 +332,7 @@ def ask(question: str, context: str) -> str:
         return f"LLM-Fehler: {e}"
 
 
-def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) -> str:
+def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None, document_mode: bool = False) -> str:
     """Freitext-Frage mit automatischem Routing und Tool-Calling.
 
     Routing:
@@ -346,6 +346,10 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) -
 
     route = _route_model(question)
 
+    if document_mode and route != "deep_research":
+        route = MODEL_LOCAL
+        log.info("Betriebsart Unterlagen: lokales Modell, keine Web-Suche")
+
     # --- Deep Research: Perplexity Sonar Deep Research ---
     if route == "deep_research":
         log.info("Route: sonar-deep-research")
@@ -426,7 +430,7 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) -
         "monatliche kosten", "versicherungskosten", "beitragsrechnung",
     ]
     _q_low = question.lower()
-    if route == MODEL_LOCAL and any(k in _q_low for k in _DOC_KW):
+    if route == MODEL_LOCAL and (document_mode or any(k in _q_low for k in _DOC_KW)):
         _rag_fn = tool_handlers.get("rag_search")
         if _rag_fn:
             try:
diff --git a/homelab-ai-bot/rag_mode.py b/homelab-ai-bot/rag_mode.py
new file mode 100644
index 00000000..7fe514a2
--- /dev/null
+++ b/homelab-ai-bot/rag_mode.py
@@ -0,0 +1,57 @@
+"""Pro-Chat: Betriebsart Unterlagen zuerst (RAG vor Web)."""
+
+from __future__ import annotations
+
+from typing import Optional, Tuple
+
+_active: dict[str, bool] = {}
+
+BTN_OFF = "📁 Unterlagen: AUS"
+BTN_ON = "📁 Unterlagen: AN"
+
+
+def is_document_mode(channel_key: str) -> bool:
+    return _active.get(channel_key, False)
+
+
+def set_document_mode(channel_key: str, on: bool) -> None:
+    if on:
+        _active[channel_key] = True
+    else:
+        _active.pop(channel_key, None)
+
+
+def toggle_document_mode(channel_key: str) -> bool:
+    cur = is_document_mode(channel_key)
+    set_document_mode(channel_key, not cur)
+    return not cur
+
+
+def keyboard_label(channel_key: str) -> str:
+    return BTN_ON if is_document_mode(channel_key) else BTN_OFF
+
+
+def is_mode_button(text: str) -> bool:
+    t = (text or "").strip()
+    return t in (BTN_ON, BTN_OFF)
+
+
+def handle_mode_button(text: str, channel_key: str) -> Optional[bool]:
+    """Returns True if turned ON, False if OFF, None if not a mode button."""
+    t = (text or "").strip()
+    if t == BTN_OFF:
+        set_document_mode(channel_key, True)
+        return True
+    if t == BTN_ON:
+        set_document_mode(channel_key, False)
+        return False
+    return None
+
+
+def strip_document_prefix(text: str) -> Tuple[str, bool]:
+    t = (text or "").strip()
+    low = t.lower()
+    for p in ("doku:", "rag:", "#doku"):
+        if low.startswith(p):
+            return t[len(p) :].lstrip(), True
+    return t, False
diff --git a/homelab-ai-bot/telegram_bot.py b/homelab-ai-bot/telegram_bot.py
index e21c03ad..462f3a9e 100644
--- a/homelab-ai-bot/telegram_bot.py
+++ b/homelab-ai-bot/telegram_bot.py
@@ -83,14 +83,21 @@ BOT_COMMANDS = [
 ]
 
 
-KEYBOARD = ReplyKeyboardMarkup(
-    [
-        [KeyboardButton("📊 Status"), KeyboardButton("❌ Fehler"), KeyboardButton("📰 Feeds")],
-        [KeyboardButton("📋 Report"), KeyboardButton("🔧 Check"), KeyboardButton("🔇 Stille")],
-    ],
-    resize_keyboard=True,
-    is_persistent=True,
-)
+def build_reply_keyboard(channel_key: str) -> ReplyKeyboardMarkup:
+    """Tastatur inkl. Schalter Betriebsart Unterlagen (RAG zuerst)."""
+    doc_btn = KeyboardButton(rag_mode.keyboard_label(channel_key))
+    return ReplyKeyboardMarkup(
+        [
+            [KeyboardButton("📊 Status"), KeyboardButton("❌ Fehler"), KeyboardButton("📰 Feeds")],
+            [KeyboardButton("📋 Report"), KeyboardButton("🔧 Check"), KeyboardButton("🔇 Stille")],
+            [doc_btn],
+        ],
+        resize_keyboard=True,
+        is_persistent=True,
+    )
+
+
+KEYBOARD = build_reply_keyboard("")
 
 BUTTON_MAP = {
     "📊 Status": "status",
@@ -106,6 +113,7 @@ import requests as _req
 import llm
 import memory_client
 import action_guard
+import rag_mode
 import monitor
 import voice
 from core import config
@@ -155,8 +163,8 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
         "/feeds — Feed-Status & Artikel\n"
         "/memory — Gedaechtnis anzeigen\n\n"
         "📷 Foto senden = Bilderkennung\n\n"
-        "Oder einfach eine Frage stellen!",
-        reply_markup=KEYBOARD,
+        "📁 Unterlagen: Schalter in der Tastatur — AN = Dokumente zuerst (RAG).\n\nOder einfach eine Frage stellen!",
+        reply_markup=build_reply_keyboard(str(update.effective_chat.id)),
     )
 
 
@@ -382,7 +390,7 @@ async def cmd_memory(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
             lines.append(f"  • {i['content'][:90]}{conf}{exp_str}{src_tag}")
         lines.append("")
     text = "\n".join(lines)
-    await update.message.reply_text(text[:4000], reply_markup=KEYBOARD)
+    await update.message.reply_text(text[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
 
 
 
@@ -405,17 +413,42 @@ async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
             await update.message.reply_text("Konnte die Nachricht nicht verstehen.")
             return
 
+        channel_key = str(update.effective_chat.id)
+
+        mode_change = rag_mode.handle_mode_button(text, channel_key)
+        if mode_change is not None:
+            state = "eingeschaltet" if mode_change else "ausgeschaltet"
+            await update.message.reply_text(
+                f"🗣 \"{text}\"\n\nBetriebsart Unterlagen zuerst: {state}.",
+                reply_markup=build_reply_keyboard(channel_key),
+            )
+            return
+
+        work_text, doc_prefix = rag_mode.strip_document_prefix(text)
+        if doc_prefix and not work_text.strip():
+            await update.message.reply_text(
+                f"🗣 \"{text}\"\n\nSchreib die Frage nach doku: oder rag:, z.B. doku: Jahreskosten",
+                reply_markup=build_reply_keyboard(channel_key),
+            )
+            return
+        document_mode = doc_prefix or rag_mode.is_document_mode(channel_key)
+
         log.info("Voice transkribiert: %s", text[:100])
         await update.message.reply_text(f"🗣 \"{text}\"\n\n🤔 Denke nach...")
 
-        channel_key = str(update.effective_chat.id)
         session_id = memory_client.get_or_create_session(channel_key, source="telegram")
 
         context.last_suggest_result = {"type": None}
         context.set_source_type("telegram_voice")
         handlers = context.get_tool_handlers(session_id=session_id)
         llm_task = asyncio.create_task(
-            asyncio.to_thread(llm.ask_with_tools, text, handlers, session_id=session_id)
+            asyncio.to_thread(
+                llm.ask_with_tools,
+                work_text,
+                handlers,
+                session_id=session_id,
+                document_mode=document_mode,
+            )
         )
         ACTIVE_LLM_TASKS[update.effective_chat.id] = llm_task
 
@@ -432,7 +465,7 @@ async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
             memory_client.log_message(session_id, "user", text)
             memory_client.log_message(session_id, "assistant", answer)
 
-        await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
+        await update.message.reply_text(answer[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
 
         audio_out = voice.synthesize(answer[:4000])
         if audio_out:
@@ -481,7 +514,7 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
             memory_client.log_message(session_id, "user", user_msg)
             memory_client.log_message(session_id, "assistant", answer)
 
-        await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
+        await update.message.reply_text(answer[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
     except Exception as e:
         log.exception("Fehler bei Foto-Analyse")
         await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
@@ -657,7 +690,7 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
                 memory_client.log_message(session_id, "user", user_msg)
                 memory_client.log_message(session_id, "assistant", answer)
 
-            await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
+            await update.message.reply_text(answer[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
         except Exception as e:
             log.exception("Fehler bei Bild-Dokument")
             await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
@@ -694,7 +727,7 @@ async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
                 memory_client.log_message(session_id, "user", user_msg)
                 memory_client.log_message(session_id, "assistant", answer)
 
-            await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
+            await update.message.reply_text(answer[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
         except Exception as e:
             log.exception("Fehler bei PDF-Analyse")
             await update.message.reply_text(f"Fehler bei PDF: {e}")
@@ -766,8 +799,28 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
     channel_key = str(update.effective_chat.id)
     session_id = memory_client.get_or_create_session(channel_key, source="telegram")
 
+    mode_change = rag_mode.handle_mode_button(text, channel_key)
+    if mode_change is not None:
+        state = "eingeschaltet" if mode_change else "ausgeschaltet"
+        await update.message.reply_text(
+            f"Betriebsart Unterlagen zuerst: {state}.\n"
+            "Solange AN: Fragen laufen zuerst gegen deine Dokumente (lokal), "
+            "nicht gegen Web/Preis-Suche.",
+            reply_markup=build_reply_keyboard(channel_key),
+        )
+        return
+
+    work_text, doc_prefix = rag_mode.strip_document_prefix(text)
+    if doc_prefix and not work_text.strip():
+        await update.message.reply_text(
+            "Schreib die Frage nach dem Doppelpunkt, z.B. doku: Jahreskosten Versicherung",
+            reply_markup=build_reply_keyboard(channel_key),
+        )
+        return
+    document_mode = doc_prefix or rag_mode.is_document_mode(channel_key)
+
     await update.message.reply_text("🤔 Denke nach...")
-    if _likely_deep_research_request(text):
+    if _likely_deep_research_request(work_text):
         await update.message.reply_text("🔎 Deep Research gestartet. Das dauert meist 2-5 Minuten.")
     try:
         context.last_suggest_result = {"type": None}
@@ -776,7 +829,13 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
             context.get_tool_handlers(session_id=session_id), channel_key
         )
         llm_task = asyncio.create_task(
-            asyncio.to_thread(llm.ask_with_tools, text, handlers, session_id=session_id)
+            asyncio.to_thread(
+                llm.ask_with_tools,
+                work_text,
+                handlers,
+                session_id=session_id,
+                document_mode=document_mode,
+            )
         )
         ACTIVE_LLM_TASKS[update.effective_chat.id] = llm_task
 
@@ -796,7 +855,7 @@ async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
         suggest = context.last_suggest_result
         log.info("suggest_result: type=%s", suggest.get("type"))
 
-        await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
+        await update.message.reply_text(answer[:4000], reply_markup=build_reply_keyboard(str(update.effective_chat.id)))
     except asyncio.CancelledError:
         log.info("Freitext-Lauf abgebrochen")
         return