slpit setnteces

2026-03-29 18:42:24 +02:00
parent 4893616a5a
commit 680f7a64e2
1 changed files with 36 additions and 8 deletions
@@ -199,20 +199,48 @@ class VoiceEngine:
            return None

        try:
-            # Piper gibt PCM-Samples zurueck, wir schreiben sie als WAV
+            # Langen Text in Saetze aufteilen (Piper hat Limits bei langen Texten)
+            import re
+            sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+            # Markdown-Formatierung entfernen
+            sentences = [re.sub(r'\*\*([^*]+)\*\*', r'\1', s).strip() for s in sentences if s.strip()]
+
+            if not sentences:
+                return None
+
+            # Jeden Satz einzeln synthetisieren und WAVs zusammenfuegen
+            all_audio = b""
+            sample_rate = None
+            for sentence in sentences:
+                if not sentence:
+                    continue
+                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+                    tmp_path = tmp.name
+                with wave.open(tmp_path, "wb") as wav_file:
+                    voice.synthesize_wav(sentence, wav_file)
+                with wave.open(tmp_path, "rb") as wav_file:
+                    if sample_rate is None:
+                        sample_rate = wav_file.getframerate()
+                    all_audio += wav_file.readframes(wav_file.getnframes())
+                Path(tmp_path).unlink(missing_ok=True)
+
+            # Zusammengefuegtes WAV erstellen
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-                tmp_path = tmp.name
+                final_path = tmp.name
+            with wave.open(final_path, "wb") as wav_file:
+                wav_file.setnchannels(1)
+                wav_file.setsampwidth(2)
+                wav_file.setframerate(sample_rate or 22050)
+                wav_file.writeframes(all_audio)

-            with wave.open(tmp_path, "wb") as wav_file:
-                voice.synthesize_wav(text, wav_file)
-
-            audio_data = Path(tmp_path).read_bytes()
-            Path(tmp_path).unlink(missing_ok=True)
+            audio_data = Path(final_path).read_bytes()
+            Path(final_path).unlink(missing_ok=True)

            logger.info(
-                "TTS: %d bytes erzeugt mit %s — '%s'",
+                "TTS: %d bytes erzeugt mit %s (%d Saetze) — '%s'",
                len(audio_data),
                voice_name,
+                len(sentences),
                text[:60],
            )
            return audio_data