homelab-brain/homelab-ai-bot/voice.py

76 lines
2.3 KiB
Python

"""Spracheingabe (Whisper) und Sprachausgabe (TTS) fuer den Hausmeister-Bot.
Nutzt die OpenAI API direkt (nicht OpenRouter).
"""
import io
import logging
from typing import Optional
import requests
from core import config
log = logging.getLogger("voice")
_api_key: Optional[str] = None
TTS_MODEL = "tts-1"
TTS_VOICE = "onyx"
WHISPER_MODEL = "whisper-1"
def _get_key() -> str:
global _api_key
if not _api_key:
cfg = config.parse_config()
_api_key = cfg.raw.get("OPENAI_API_KEY", "")
if not _api_key:
log.error("OPENAI_API_KEY fehlt in homelab.conf")
return _api_key
def transcribe(audio_bytes: bytes, filename: str = "voice.ogg") -> Optional[str]:
"""Transkribiert Audio via Whisper API. Gibt Text zurueck oder None bei Fehler."""
key = _get_key()
if not key:
return None
try:
r = requests.post(
"https://api.openai.com/v1/audio/transcriptions",
headers={"Authorization": f"Bearer {key}"},
files={"file": (filename, io.BytesIO(audio_bytes), "audio/ogg")},
data={"model": WHISPER_MODEL, "language": "de"},
timeout=30,
)
if r.ok:
text = r.json().get("text", "").strip()
log.info("Whisper: %s", text[:80])
return text
log.warning("Whisper Fehler: %s %s", r.status_code, r.text[:200])
except Exception as e:
log.warning("Whisper Exception: %s", e)
return None
def synthesize(text: str) -> Optional[bytes]:
"""Erzeugt Sprache via TTS API. Gibt OGG-Bytes zurueck oder None bei Fehler."""
key = _get_key()
if not key:
return None
if len(text) > 4000:
text = text[:4000]
try:
r = requests.post(
"https://api.openai.com/v1/audio/speech",
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
json={"model": TTS_MODEL, "input": text, "voice": TTS_VOICE, "response_format": "opus"},
timeout=30,
)
if r.ok:
log.info("TTS: %d bytes fuer %d Zeichen", len(r.content), len(text))
return r.content
log.warning("TTS Fehler: %s %s", r.status_code, r.text[:200])
except Exception as e:
log.warning("TTS Exception: %s", e)
return None