diff --git a/homelab-ai-bot/llm.py b/homelab-ai-bot/llm.py index 8180e65f..719d6857 100644 --- a/homelab-ai-bot/llm.py +++ b/homelab-ai-bot/llm.py @@ -484,3 +484,82 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) - except Exception as e: return f"LLM-Fehler: {e}" + + +def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session_id: str = None) -> str: + """Bild-Analyse mit optionalem Text und Tool-Calling via Vision-faehigem Modell.""" + api_key = _get_api_key() + if not api_key: + return "OpenRouter API Key fehlt in homelab.conf" + + try: + import memory_client + query = caption if caption else "Bild-Analyse" + memory_items = memory_client.get_relevant_memory(query, top_k=10) + memory_block = memory_client.format_memory_for_prompt(memory_items) + except Exception: + memory_block = "" + + prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst." + user_content = [ + {"type": "text", "text": prompt_text}, + {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}, + ] + + messages = [ + {"role": "system", "content": SYSTEM_PROMPT + memory_block}, + ] + + if session_id: + try: + import memory_client + history = memory_client.get_session_messages(session_id, limit=6) + for msg in history: + role = msg.get("role", "") + content = msg.get("content", "") + if content and role in ("user", "assistant"): + messages.append({"role": role, "content": content}) + except Exception: + pass + + messages.append({"role": "user", "content": user_content}) + + try: + for _round in range(MAX_TOOL_ROUNDS): + data = _call_openrouter(messages, api_key, use_tools=True) + choice = data["choices"][0] + msg = choice["message"] + + tool_calls = msg.get("tool_calls") + if not tool_calls: + return msg.get("content", "Keine Antwort vom LLM.") + + messages.append(msg) + + for tc in tool_calls: + fn_name = tc["function"]["name"] + try: + fn_args = json.loads(tc["function"]["arguments"]) + except (json.JSONDecodeError, KeyError): + fn_args = {} + + handler = tool_handlers.get(fn_name) + if handler: + try: + result = handler(**fn_args) + except Exception as e: + result = f"Fehler bei {fn_name}: {e}" + else: + result = f"Unbekanntes Tool: {fn_name}" + + messages.append({ + "role": "tool", + "tool_call_id": tc["id"], + "content": str(result)[:3000], + }) + + data = _call_openrouter(messages, api_key, use_tools=False) + return data["choices"][0]["message"]["content"] + + except Exception as e: + return f"LLM-Fehler: {e}" diff --git a/homelab-ai-bot/telegram_bot.py b/homelab-ai-bot/telegram_bot.py index 9d82fc7b..351ef5d1 100644 --- a/homelab-ai-bot/telegram_bot.py +++ b/homelab-ai-bot/telegram_bot.py @@ -123,6 +123,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE): "/check — Monitoring-Check\n" "/feeds — Feed-Status & Artikel\n" "/memory — Gedaechtnis anzeigen\n\n" + "📷 Foto senden = Bilderkennung\n\n" "Oder einfach eine Frage stellen!", reply_markup=KEYBOARD, ) @@ -397,6 +398,43 @@ async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE): await update.message.reply_text(f"Fehler: {e}") +async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + """Foto-Nachricht: Bild analysieren via Vision-LLM.""" + if not _authorized(update): + return + photos = update.message.photo + if not photos: + return + + photo = photos[-1] + caption = update.message.caption or "" + + await update.message.reply_text("🔍 Analysiere Bild...") + try: + import base64 + tg_file = await ctx.bot.get_file(photo.file_id) + image_data = await tg_file.download_as_bytearray() + image_base64 = base64.b64encode(bytes(image_data)).decode("utf-8") + + channel_key = str(update.effective_chat.id) + session_id = memory_client.get_or_create_session(channel_key, source="telegram") + + context.last_suggest_result = {"type": None} + context.set_source_type("telegram_photo") + handlers = context.get_tool_handlers(session_id=session_id) + answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id) + + if session_id: + user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]" + memory_client.log_message(session_id, "user", user_msg) + memory_client.log_message(session_id, "assistant", answer) + + await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD) + except Exception as e: + log.exception("Fehler bei Foto-Analyse") + await update.message.reply_text(f"Fehler bei Bildanalyse: {e}") + + async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE): """Button-Presses und Freitext-Fragen verarbeiten.""" if not _authorized(update): @@ -466,6 +504,7 @@ def main(): app.add_handler(CommandHandler("feeds", cmd_feeds)) app.add_handler(CommandHandler("memory", cmd_memory)) app.add_handler(MessageHandler(filters.VOICE, handle_voice)) + app.add_handler(MessageHandler(filters.PHOTO, handle_photo)) app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) async def post_init(application):