diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index def17b6..dd3f163 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -199,20 +199,48 @@ class VoiceEngine: return None try: - # Piper gibt PCM-Samples zurueck, wir schreiben sie als WAV + # Langen Text in Saetze aufteilen (Piper hat Limits bei langen Texten) + import re + sentences = re.split(r'(?<=[.!?])\s+', text.strip()) + # Markdown-Formatierung entfernen + sentences = [re.sub(r'\*\*([^*]+)\*\*', r'\1', s).strip() for s in sentences if s.strip()] + + if not sentences: + return None + + # Jeden Satz einzeln synthetisieren und WAVs zusammenfuegen + all_audio = b"" + sample_rate = None + for sentence in sentences: + if not sentence: + continue + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: + tmp_path = tmp.name + with wave.open(tmp_path, "wb") as wav_file: + voice.synthesize_wav(sentence, wav_file) + with wave.open(tmp_path, "rb") as wav_file: + if sample_rate is None: + sample_rate = wav_file.getframerate() + all_audio += wav_file.readframes(wav_file.getnframes()) + Path(tmp_path).unlink(missing_ok=True) + + # Zusammengefuegtes WAV erstellen with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: - tmp_path = tmp.name + final_path = tmp.name + with wave.open(final_path, "wb") as wav_file: + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(sample_rate or 22050) + wav_file.writeframes(all_audio) - with wave.open(tmp_path, "wb") as wav_file: - voice.synthesize_wav(text, wav_file) - - audio_data = Path(tmp_path).read_bytes() - Path(tmp_path).unlink(missing_ok=True) + audio_data = Path(final_path).read_bytes() + Path(final_path).unlink(missing_ok=True) logger.info( - "TTS: %d bytes erzeugt mit %s — '%s'", + "TTS: %d bytes erzeugt mit %s (%d Saetze) — '%s'", len(audio_data), voice_name, + len(sentences), text[:60], ) return audio_data