Vision: intelligente Bilderkennung + PDF-Support + Dokument-Handler
This commit is contained in:
parent
89f2c03fa0
commit
aed9e6d28a
2 changed files with 102 additions and 0 deletions
|
|
@ -63,6 +63,15 @@ SESSION-RUECKBLICK:
|
||||||
- Optional kurz erwaehnen was sonst noch Thema war.
|
- Optional kurz erwaehnen was sonst noch Thema war.
|
||||||
- session_search nur fuer Stichwort-Suche in ALTEN Sessions (nicht aktuelle).
|
- session_search nur fuer Stichwort-Suche in ALTEN Sessions (nicht aktuelle).
|
||||||
|
|
||||||
|
BILDERKENNUNG:
|
||||||
|
Wenn der User ein Bild oder PDF schickt:
|
||||||
|
- Beschreibe STRUKTURIERT was du siehst.
|
||||||
|
- Bei Flugplaenen/Buchungen: Extrahiere ALLE Daten (Flugnummer, Datum, Abflug/Ankunft Uhrzeit, Airports mit IATA-Codes, Preis, Buchungscode, Airline, Sitzplatz, Gepaeck).
|
||||||
|
- Bei Screenshots von Fehlern/Logs: Identifiziere das Problem, ordne es einem Container/Service zu, schlage Loesung vor.
|
||||||
|
- Bei Rechnungen/Dokumenten: Extrahiere Betrag, Datum, Absender, Faelligkeit.
|
||||||
|
- WICHTIG: Speichere erkannte Reiseplaene, Termine, Buchungen IMMER via memory_suggest (memory_type="plan", mit expires_at).
|
||||||
|
- Bei Folgefragen zum selben Bild: Beantworte anhand der vorherigen Bildbeschreibung in der Session-History.
|
||||||
|
|
||||||
TOOLS:
|
TOOLS:
|
||||||
Nutze Tools fuer Live-Daten. Wenn alles OK: kurz sagen. Bei Problemen: erklaeren + Loesung."""
|
Nutze Tools fuer Live-Daten. Wenn alles OK: kurz sagen. Bei Problemen: erklaeren + Loesung."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -435,6 +435,98 @@ async def handle_photo(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
|
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_pdf_text(pdf_bytes: bytes) -> str:
|
||||||
|
"""Extrahiert Text aus PDF via PyPDF2. Gibt leeren String zurueck wenn kein Text."""
|
||||||
|
try:
|
||||||
|
import io as _io
|
||||||
|
from PyPDF2 import PdfReader
|
||||||
|
reader = PdfReader(_io.BytesIO(pdf_bytes))
|
||||||
|
pages = []
|
||||||
|
for i, page in enumerate(reader.pages[:10]):
|
||||||
|
text = page.extract_text()
|
||||||
|
if text and text.strip():
|
||||||
|
pages.append(f"--- Seite {i+1} ---\n{text.strip()}")
|
||||||
|
return "\n\n".join(pages)
|
||||||
|
except Exception as e:
|
||||||
|
log.warning("PDF-Extraktion fehlgeschlagen: %s", e)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_document(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
|
"""Dokument-Nachricht: Bilder und PDFs analysieren."""
|
||||||
|
if not _authorized(update):
|
||||||
|
return
|
||||||
|
doc = update.message.document
|
||||||
|
if not doc:
|
||||||
|
return
|
||||||
|
|
||||||
|
mime = doc.mime_type or ""
|
||||||
|
caption = update.message.caption or ""
|
||||||
|
channel_key = str(update.effective_chat.id)
|
||||||
|
session_id = memory_client.get_or_create_session(channel_key, source="telegram")
|
||||||
|
|
||||||
|
if mime.startswith("image/"):
|
||||||
|
await update.message.reply_text("🔍 Analysiere Bild...")
|
||||||
|
try:
|
||||||
|
import base64
|
||||||
|
tg_file = await ctx.bot.get_file(doc.file_id)
|
||||||
|
image_data = await tg_file.download_as_bytearray()
|
||||||
|
image_base64 = base64.b64encode(bytes(image_data)).decode("utf-8")
|
||||||
|
|
||||||
|
context.last_suggest_result = {"type": None}
|
||||||
|
context.set_source_type("telegram_photo")
|
||||||
|
handlers = context.get_tool_handlers(session_id=session_id)
|
||||||
|
answer = llm.ask_with_image(image_base64, caption, handlers, session_id=session_id)
|
||||||
|
|
||||||
|
if session_id:
|
||||||
|
user_msg = f"[Bild-Datei] {caption}" if caption else "[Bild-Datei gesendet]"
|
||||||
|
memory_client.log_message(session_id, "user", user_msg)
|
||||||
|
memory_client.log_message(session_id, "assistant", answer)
|
||||||
|
|
||||||
|
await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception("Fehler bei Bild-Dokument")
|
||||||
|
await update.message.reply_text(f"Fehler bei Bildanalyse: {e}")
|
||||||
|
|
||||||
|
elif mime == "application/pdf":
|
||||||
|
await update.message.reply_text("📄 Lese PDF...")
|
||||||
|
try:
|
||||||
|
tg_file = await ctx.bot.get_file(doc.file_id)
|
||||||
|
pdf_data = await tg_file.download_as_bytearray()
|
||||||
|
pdf_text = _extract_pdf_text(bytes(pdf_data))
|
||||||
|
|
||||||
|
if not pdf_text:
|
||||||
|
await update.message.reply_text(
|
||||||
|
"PDF enthält keinen extrahierbaren Text (evtl. gescannt/Bild-PDF).\n"
|
||||||
|
"Tipp: Sende einen Screenshot des PDFs als Foto — dann kann ich es per Bilderkennung lesen."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
question = caption if caption else "Analysiere dieses Dokument. Was sind die wichtigsten Informationen?"
|
||||||
|
full_prompt = f"{question}\n\n--- PDF-INHALT ---\n{pdf_text[:6000]}"
|
||||||
|
|
||||||
|
context.last_suggest_result = {"type": None}
|
||||||
|
context.set_source_type("telegram_pdf")
|
||||||
|
handlers = context.get_tool_handlers(session_id=session_id)
|
||||||
|
answer = llm.ask_with_tools(full_prompt, handlers, session_id=session_id)
|
||||||
|
|
||||||
|
if session_id:
|
||||||
|
user_msg = f"[PDF: {doc.file_name or 'dokument.pdf'}] {caption}" if caption else f"[PDF: {doc.file_name or 'dokument.pdf'}]"
|
||||||
|
memory_client.log_message(session_id, "user", user_msg)
|
||||||
|
memory_client.log_message(session_id, "assistant", answer)
|
||||||
|
|
||||||
|
await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception("Fehler bei PDF-Analyse")
|
||||||
|
await update.message.reply_text(f"Fehler bei PDF: {e}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
await update.message.reply_text(
|
||||||
|
f"Dateityp '{mime}' wird nicht unterstuetzt.\n"
|
||||||
|
"Unterstuetzt: Bilder (JPG/PNG) und PDFs."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||||
"""Button-Presses und Freitext-Fragen verarbeiten."""
|
"""Button-Presses und Freitext-Fragen verarbeiten."""
|
||||||
if not _authorized(update):
|
if not _authorized(update):
|
||||||
|
|
@ -505,6 +597,7 @@ def main():
|
||||||
app.add_handler(CommandHandler("memory", cmd_memory))
|
app.add_handler(CommandHandler("memory", cmd_memory))
|
||||||
app.add_handler(MessageHandler(filters.VOICE, handle_voice))
|
app.add_handler(MessageHandler(filters.VOICE, handle_voice))
|
||||||
app.add_handler(MessageHandler(filters.PHOTO, handle_photo))
|
app.add_handler(MessageHandler(filters.PHOTO, handle_photo))
|
||||||
|
app.add_handler(MessageHandler(filters.Document.ALL, handle_document))
|
||||||
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
||||||
|
|
||||||
async def post_init(application):
|
async def post_init(application):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue