Fotoerkennung: handle_photo + ask_with_image fuer Hausmeister-Bot

This commit is contained in:
root 2026-03-16 09:06:00 +07:00
parent fe94f200e8
commit 89f2c03fa0
2 changed files with 118 additions and 0 deletions

View file

@ -484,3 +484,82 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) -
except Exception as e: except Exception as e:
return f"LLM-Fehler: {e}" return f"LLM-Fehler: {e}"
def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session_id: str = None) -> str:
"""Bild-Analyse mit optionalem Text und Tool-Calling via Vision-faehigem Modell."""
api_key = _get_api_key()
if not api_key:
return "OpenRouter API Key fehlt in homelab.conf"
try:
import memory_client
query = caption if caption else "Bild-Analyse"
memory_items = memory_client.get_relevant_memory(query, top_k=10)
memory_block = memory_client.format_memory_for_prompt(memory_items)
except Exception:
memory_block = ""
prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst."
user_content = [
{"type": "text", "text": prompt_text},
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}},
]
messages = [
{"role": "system", "content": SYSTEM_PROMPT + memory_block},
]
if session_id:
try:
import memory_client
history = memory_client.get_session_messages(session_id, limit=6)
for msg in history:
role = msg.get("role", "")
content = msg.get("content", "")
if content and role in ("user", "assistant"):
messages.append({"role": role, "content": content})
except Exception:
pass
messages.append({"role": "user", "content": user_content})
try:
for _round in range(MAX_TOOL_ROUNDS):
data = _call_openrouter(messages, api_key, use_tools=True)
choice = data["choices"][0]
msg = choice["message"]
tool_calls = msg.get("tool_calls")
if not tool_calls:
return msg.get("content", "Keine Antwort vom LLM.")
messages.append(msg)
for tc in tool_calls:
fn_name = tc["function"]["name"]
try:
fn_args = json.loads(tc["function"]["arguments"])
except (json.JSONDecodeError, KeyError):
fn_args = {}
handler = tool_handlers.get(fn_name)
if handler:
try:
result = handler(**fn_args)
except Exception as e:
result = f"Fehler bei {fn_name}: {e}"
else:
result = f"Unbekanntes Tool: {fn_name}"
messages.append({
"role": "tool",
"tool_call_id": tc["id"],
"content": str(result)[:3000],
})
data = _call_openrouter(messages, api_key, use_tools=False)
return data["choices"][0]["message"]["content"]
except Exception as e:
return f"LLM-Fehler: {e}"

View file

@ -123,6 +123,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"/check — Monitoring-Check\n" "/check — Monitoring-Check\n"
"/feeds — Feed-Status & Artikel\n" "/feeds — Feed-Status & Artikel\n"
"/memory — Gedaechtnis anzeigen\n\n" "/memory — Gedaechtnis anzeigen\n\n"
"📷 Foto senden = Bilderkennung\n\n"
"Oder einfach eine Frage stellen!", "Oder einfach eine Frage stellen!",
reply_markup=KEYBOARD, reply_markup=KEYBOARD,
) )
@ -397,6 +398,43 @@ async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await update.message.reply_text(f"Fehler: {e}") await update.message.reply_text(f"Fehler: {e}")
async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"""Foto-Nachricht: Bild analysieren via Vision-LLM."""
if not _authorized(update):
return
photos = update.message.photo
if not photos:
return
photo = photos[-1]
caption = update.message.caption or ""
await update.message.reply_text("🔍 Analysiere Bild...")
try:
import base64
tg_file = await ctx.bot.get_file(photo.file_id)
image_data = await tg_file.download_as_bytearray()
image_base64 = base64.b64encode(bytes(image_data)).decode("utf-8")
channel_key = str(update.effective_chat.id)
session_id = memory_client.get_or_create_session(channel_key, source="telegram")
context.last_suggest_result = {"type": None}
context.set_source_type("telegram_photo")
handlers = context.get_tool_handlers(session_id=session_id)
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
if session_id:
user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]"
memory_client.log_message(session_id, "user", user_msg)
memory_client.log_message(session_id, "assistant", answer)
await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
except Exception as e:
log.exception("Fehler bei Foto-Analyse")
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE): async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"""Button-Presses und Freitext-Fragen verarbeiten.""" """Button-Presses und Freitext-Fragen verarbeiten."""
if not _authorized(update): if not _authorized(update):
@ -466,6 +504,7 @@ def main():
app.add_handler(CommandHandler("feeds", cmd_feeds)) app.add_handler(CommandHandler("feeds", cmd_feeds))
app.add_handler(CommandHandler("memory", cmd_memory)) app.add_handler(CommandHandler("memory", cmd_memory))
app.add_handler(MessageHandler(filters.VOICE, handle_voice)) app.add_handler(MessageHandler(filters.VOICE, handle_voice))
app.add_handler(MessageHandler(filters.PHOTO, handle_photo))
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message)) app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
async def post_init(application): async def post_init(application):