Voice-Modul: Whisper STT + TTS Onyx fuer Hausmeister-Bot
This commit is contained in:
parent
5643182bd7
commit
74e205fe98
3 changed files with 123 additions and 0 deletions
|
|
@ -80,6 +80,7 @@ import requests as _req
|
|||
import llm
|
||||
import memory_client
|
||||
import monitor
|
||||
import voice
|
||||
from core import config
|
||||
|
||||
logging.basicConfig(
|
||||
|
|
@ -383,6 +384,50 @@ async def handle_memory_callback(update: Update, ctx: ContextTypes.DEFAULT_TYPE)
|
|||
await query.edit_message_text("Fehler beim Loeschen.")
|
||||
|
||||
|
||||
async def handle_voice(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
"""Sprachnachricht: Whisper STT -> LLM -> TTS Antwort als Text + Sprache."""
|
||||
if not _authorized(update):
|
||||
return
|
||||
voice_msg = update.message.voice
|
||||
if not voice_msg:
|
||||
return
|
||||
|
||||
await update.message.reply_text("🎙 Höre zu...")
|
||||
try:
|
||||
tg_file = await ctx.bot.get_file(voice_msg.file_id)
|
||||
audio_data = await tg_file.download_as_bytearray()
|
||||
|
||||
text = voice.transcribe(bytes(audio_data))
|
||||
if not text:
|
||||
await update.message.reply_text("Konnte die Nachricht nicht verstehen.")
|
||||
return
|
||||
|
||||
log.info("Voice transkribiert: %s", text[:100])
|
||||
await update.message.reply_text(f"🗣 \"{text}\"\n\n🤔 Denke nach...")
|
||||
|
||||
channel_key = str(update.effective_chat.id)
|
||||
session_id = memory_client.get_or_create_session(channel_key, source="telegram")
|
||||
|
||||
context.last_suggest_result = {"type": None, "candidate_id": None}
|
||||
handlers = context.get_tool_handlers(session_id=session_id)
|
||||
answer = llm.ask_with_tools(text, handlers, session_id=session_id)
|
||||
if session_id:
|
||||
memory_client.log_message(session_id, "user", text)
|
||||
memory_client.log_message(session_id, "assistant", answer)
|
||||
|
||||
await update.message.reply_text(answer[:4000], reply_markup=KEYBOARD)
|
||||
|
||||
audio_out = voice.synthesize(answer[:4000])
|
||||
if audio_out:
|
||||
import io as _io
|
||||
await update.message.reply_voice(voice=_io.BytesIO(audio_out))
|
||||
else:
|
||||
log.warning("TTS fehlgeschlagen — nur Text gesendet")
|
||||
except Exception as e:
|
||||
log.exception("Fehler bei Voice-Nachricht")
|
||||
await update.message.reply_text(f"Fehler: {e}")
|
||||
|
||||
|
||||
async def handle_message(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
"""Button-Presses und Freitext-Fragen verarbeiten."""
|
||||
if not _authorized(update):
|
||||
|
|
@ -517,6 +562,7 @@ def main():
|
|||
app.add_handler(CommandHandler("feeds", cmd_feeds))
|
||||
app.add_handler(CommandHandler("memory", cmd_memory))
|
||||
app.add_handler(CallbackQueryHandler(handle_memory_callback, pattern=r"^mem_"))
|
||||
app.add_handler(MessageHandler(filters.VOICE, handle_voice))
|
||||
app.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, handle_message))
|
||||
|
||||
async def post_init(application):
|
||||
|
|
|
|||
76
homelab-ai-bot/voice.py
Normal file
76
homelab-ai-bot/voice.py
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
"""Spracheingabe (Whisper) und Sprachausgabe (TTS) fuer den Hausmeister-Bot.
|
||||
|
||||
Nutzt die OpenAI API direkt (nicht OpenRouter).
|
||||
"""
|
||||
|
||||
import io
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from core import config
|
||||
|
||||
log = logging.getLogger("voice")
|
||||
|
||||
_api_key: Optional[str] = None
|
||||
|
||||
TTS_MODEL = "tts-1"
|
||||
TTS_VOICE = "onyx"
|
||||
WHISPER_MODEL = "whisper-1"
|
||||
|
||||
|
||||
def _get_key() -> str:
|
||||
global _api_key
|
||||
if not _api_key:
|
||||
cfg = config.parse_config()
|
||||
_api_key = cfg.raw.get("OPENAI_API_KEY", "")
|
||||
if not _api_key:
|
||||
log.error("OPENAI_API_KEY fehlt in homelab.conf")
|
||||
return _api_key
|
||||
|
||||
|
||||
def transcribe(audio_bytes: bytes, filename: str = "voice.ogg") -> Optional[str]:
|
||||
"""Transkribiert Audio via Whisper API. Gibt Text zurueck oder None bei Fehler."""
|
||||
key = _get_key()
|
||||
if not key:
|
||||
return None
|
||||
try:
|
||||
r = requests.post(
|
||||
"https://api.openai.com/v1/audio/transcriptions",
|
||||
headers={"Authorization": f"Bearer {key}"},
|
||||
files={"file": (filename, io.BytesIO(audio_bytes), "audio/ogg")},
|
||||
data={"model": WHISPER_MODEL, "language": "de"},
|
||||
timeout=30,
|
||||
)
|
||||
if r.ok:
|
||||
text = r.json().get("text", "").strip()
|
||||
log.info("Whisper: %s", text[:80])
|
||||
return text
|
||||
log.warning("Whisper Fehler: %s %s", r.status_code, r.text[:200])
|
||||
except Exception as e:
|
||||
log.warning("Whisper Exception: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def synthesize(text: str) -> Optional[bytes]:
|
||||
"""Erzeugt Sprache via TTS API. Gibt OGG-Bytes zurueck oder None bei Fehler."""
|
||||
key = _get_key()
|
||||
if not key:
|
||||
return None
|
||||
if len(text) > 4000:
|
||||
text = text[:4000]
|
||||
try:
|
||||
r = requests.post(
|
||||
"https://api.openai.com/v1/audio/speech",
|
||||
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
|
||||
json={"model": TTS_MODEL, "input": text, "voice": TTS_VOICE, "response_format": "opus"},
|
||||
timeout=30,
|
||||
)
|
||||
if r.ok:
|
||||
log.info("TTS: %d bytes fuer %d Zeichen", len(r.content), len(text))
|
||||
return r.content
|
||||
log.warning("TTS Fehler: %s %s", r.status_code, r.text[:200])
|
||||
except Exception as e:
|
||||
log.warning("TTS Exception: %s", e)
|
||||
return None
|
||||
|
|
@ -176,6 +176,7 @@ FORGEJO_TOKEN="b874766bdf357bd4c32fa4369d0c588fc6193336"
|
|||
FORGEJO_SYNC_TOKEN="5402da0447b0eb6aede721a8748a08974ddc5c42"
|
||||
GITHUB_PAT="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"
|
||||
OPENROUTER_KEY="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"
|
||||
OPENAI_API_KEY="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"
|
||||
MEMORY_API_TOKEN="Ai8eeQibV6Z1RWc7oNPim4PXB4vILU1nRW2-XgRcX2M"
|
||||
MEMORY_API_URL="http://100.121.192.94:8400"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue