83 lines
2.5 KiB
Python
83 lines
2.5 KiB
Python
"""Spracheingabe (Whisper) und Sprachausgabe (TTS) fuer den Hausmeister-Bot.
|
|
|
|
Nutzt die OpenAI API direkt (nicht OpenRouter).
|
|
"""
|
|
|
|
import io
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
from core import config
|
|
|
|
log = logging.getLogger("voice")
|
|
|
|
_api_key: Optional[str] = None
|
|
|
|
TTS_MODEL = "gpt-4o-mini-tts"
|
|
TTS_VOICE = "onyx"
|
|
TTS_INSTRUCTIONS = "Sprich klar und deutlich auf Deutsch mit natuerlichem deutschem Akzent. Sachlicher, ruhiger Ton."
|
|
WHISPER_MODEL = "whisper-1"
|
|
|
|
|
|
def _get_key() -> str:
|
|
global _api_key
|
|
if not _api_key:
|
|
cfg = config.parse_config()
|
|
_api_key = cfg.raw.get("OPENAI_API_KEY", "")
|
|
if not _api_key:
|
|
log.error("OPENAI_API_KEY fehlt in homelab.conf")
|
|
return _api_key
|
|
|
|
|
|
def transcribe(audio_bytes: bytes, filename: str = "voice.ogg") -> Optional[str]:
|
|
"""Transkribiert Audio via Whisper API. Gibt Text zurueck oder None bei Fehler."""
|
|
key = _get_key()
|
|
if not key:
|
|
return None
|
|
try:
|
|
r = requests.post(
|
|
"https://api.openai.com/v1/audio/transcriptions",
|
|
headers={"Authorization": f"Bearer {key}"},
|
|
files={"file": (filename, io.BytesIO(audio_bytes), "audio/ogg")},
|
|
data={"model": WHISPER_MODEL, "language": "de"},
|
|
timeout=30,
|
|
)
|
|
if r.ok:
|
|
text = r.json().get("text", "").strip()
|
|
log.info("Whisper: %s", text[:80])
|
|
return text
|
|
log.warning("Whisper Fehler: %s %s", r.status_code, r.text[:200])
|
|
except Exception as e:
|
|
log.warning("Whisper Exception: %s", e)
|
|
return None
|
|
|
|
|
|
def synthesize(text: str) -> Optional[bytes]:
|
|
"""Erzeugt Sprache via TTS API. Gibt OGG-Bytes zurueck oder None bei Fehler."""
|
|
key = _get_key()
|
|
if not key:
|
|
return None
|
|
if len(text) > 4000:
|
|
text = text[:4000]
|
|
try:
|
|
r = requests.post(
|
|
"https://api.openai.com/v1/audio/speech",
|
|
headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"},
|
|
json={
|
|
"model": TTS_MODEL,
|
|
"input": text,
|
|
"voice": TTS_VOICE,
|
|
"instructions": TTS_INSTRUCTIONS,
|
|
"response_format": "opus",
|
|
},
|
|
timeout=30,
|
|
)
|
|
if r.ok:
|
|
log.info("TTS: %d bytes fuer %d Zeichen", len(r.content), len(text))
|
|
return r.content
|
|
log.warning("TTS Fehler: %s %s", r.status_code, r.text[:200])
|
|
except Exception as e:
|
|
log.warning("TTS Exception: %s", e)
|
|
return None
|