Fotoerkennung: handle_photo + ask_with_image fuer Hausmeister-Bot
This commit is contained in:
parent
fe94f200e8
commit
89f2c03fa0
2 changed files with 118 additions and 0 deletions
|
|
@ -484,3 +484,82 @@ def ask_with_tools(question: str, tool_handlers: dict, session_id: str = None) -
|
|||
|
||||
except Exception as e:
|
||||
return f"LLM-Fehler: {e}"
|
||||
|
||||
|
||||
def ask_with_image(image_base64: str, caption: str, tool_handlers: dict, session_id: str = None) -> str:
|
||||
"""Bild-Analyse mit optionalem Text und Tool-Calling via Vision-faehigem Modell."""
|
||||
api_key = _get_api_key()
|
||||
if not api_key:
|
||||
return "OpenRouter API Key fehlt in homelab.conf"
|
||||
|
||||
try:
|
||||
import memory_client
|
||||
query = caption if caption else "Bild-Analyse"
|
||||
memory_items = memory_client.get_relevant_memory(query, top_k=10)
|
||||
memory_block = memory_client.format_memory_for_prompt(memory_items)
|
||||
except Exception:
|
||||
memory_block = ""
|
||||
|
||||
prompt_text = caption if caption else "Was siehst du auf diesem Bild? Beschreibe was du erkennst."
|
||||
user_content = [
|
||||
{"type": "text", "text": prompt_text},
|
||||
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}},
|
||||
]
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": SYSTEM_PROMPT + memory_block},
|
||||
]
|
||||
|
||||
if session_id:
|
||||
try:
|
||||
import memory_client
|
||||
history = memory_client.get_session_messages(session_id, limit=6)
|
||||
for msg in history:
|
||||
role = msg.get("role", "")
|
||||
content = msg.get("content", "")
|
||||
if content and role in ("user", "assistant"):
|
||||
messages.append({"role": role, "content": content})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
try:
|
||||
for _round in range(MAX_TOOL_ROUNDS):
|
||||
data = _call_openrouter(messages, api_key, use_tools=True)
|
||||
choice = data["choices"][0]
|
||||
msg = choice["message"]
|
||||
|
||||
tool_calls = msg.get("tool_calls")
|
||||
if not tool_calls:
|
||||
return msg.get("content", "Keine Antwort vom LLM.")
|
||||
|
||||
messages.append(msg)
|
||||
|
||||
for tc in tool_calls:
|
||||
fn_name = tc["function"]["name"]
|
||||
try:
|
||||
fn_args = json.loads(tc["function"]["arguments"])
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
fn_args = {}
|
||||
|
||||
handler = tool_handlers.get(fn_name)
|
||||
if handler:
|
||||
try:
|
||||
result = handler(**fn_args)
|
||||
except Exception as e:
|
||||
result = f"Fehler bei {fn_name}: {e}"
|
||||
else:
|
||||
result = f"Unbekanntes Tool: {fn_name}"
|
||||
|
||||
messages.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tc["id"],
|
||||
"content": str(result)[:3000],
|
||||
})
|
||||
|
||||
data = _call_openrouter(messages, api_key, use_tools=False)
|
||||
return data["choices"][0]["message"]["content"]
|
||||
|
||||
except Exception as e:
|
||||
return f"LLM-Fehler: {e}"
|
||||
|
|
|
|||
|
|
@ -123,6 +123,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|||
"/check — Monitoring-Check\n"
|
||||
"/feeds — Feed-Status & Artikel\n"
|
||||
"/memory — Gedaechtnis anzeigen\n\n"
|
||||
"📷 Foto senden = Bilderkennung\n\n"
|
||||
"Oder einfach eine Frage stellen!",
|
||||
reply_markup=KEYBOARD,
|
||||
)
|
||||
|
|
@ -397,6 +398,43 @@ async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
|||
await update.message.reply_text(f"Fehler: {e}")
|
||||
|
||||
|
||||
async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
"""Foto-Nachricht: Bild analysieren via Vision-LLM."""
|
||||
if not _authorized(update):
|
||||
return
|
||||
photos = update.message.photo
|
||||
if not photos:
|
||||
return
|
||||
|
||||
photo = photos[-1]
|
||||
caption = update.message.caption or ""
|
||||
|
||||
await update.message.reply_text("🔍 Analysiere Bild...")
|
||||
try:
|
||||
import base64
|
||||
tg_file = await ctx.bot.get_file(photo.file_id)
|
||||
image_data = await tg_file.download_as_bytearray()
|
||||
image_base64 = base64.b64encode(bytes(image_data)).decode("utf-8")
|
||||
|
||||
channel_key = str(update.effective_chat.id)
|
||||
session_id = memory_client.get_or_create_session(channel_key, source="telegram")
|
||||
|
||||
context.last_suggest_result = {"type": None}
|
||||
context.set_source_type("telegram_photo")
|
||||
handlers = context.get_tool_handlers(session_id=session_id)
|
||||
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
|
||||
|
||||
if session_id:
|
||||
user_msg = f"[Foto] {caption}" if caption else "[Foto gesendet]"
|
||||
memory_client.log_message(session_id, "user", user_msg)
|
||||
memory_client.log_message(session_id, "assistant", answer)
|
||||
|
||||
await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
|
||||
except Exception as e:
|
||||
log.exception("Fehler bei Foto-Analyse")
|
||||
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
|
||||
|
||||
|
||||
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
"""Button-Presses und Freitext-Fragen verarbeiten."""
|
||||
if not _authorized(update):
|
||||
|
|
@ -466,6 +504,7 @@ def main():
|
|||
app.add_handler(CommandHandler("feeds", cmd_feeds))
|
||||
app.add_handler(CommandHandler("memory", cmd_memory))
|
||||
app.add_handler(MessageHandler(filters.VOICE, handle_voice))
|
||||
app.add_handler(MessageHandler(filters.PHOTO, handle_photo))
|
||||
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
||||
|
||||
async def post_init(application):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue