diff --git a/homelab-ai-bot/voice.py b/homelab-ai-bot/voice.py index e2e219b8..74d1f095 100644 --- a/homelab-ai-bot/voice.py +++ b/homelab-ai-bot/voice.py @@ -15,8 +15,9 @@ log = logging.getLogger("voice") _api_key: Optional[str] = None -TTS_MODEL = "tts-1" +TTS_MODEL = "gpt-4o-mini-tts" TTS_VOICE = "onyx" +TTS_INSTRUCTIONS = "Sprich klar und deutlich auf Deutsch mit natuerlichem deutschem Akzent. Sachlicher, ruhiger Ton." WHISPER_MODEL = "whisper-1" @@ -64,7 +65,13 @@ def synthesize(text: str) -> Optional[bytes]: r = requests.post( "https://api.openai.com/v1/audio/speech", headers={"Authorization": f"Bearer {key}", "Content-Type": "application/json"}, - json={"model": TTS_MODEL, "input": text, "voice": TTS_VOICE, "response_format": "opus"}, + json={ + "model": TTS_MODEL, + "input": text, + "voice": TTS_VOICE, + "instructions": TTS_INSTRUCTIONS, + "response_format": "opus", + }, timeout=30, ) if r.ok: