tts in chungs und stt auf 60 sekunden erhöht

2025-12-27 03:22:26 +01:00 · 2025-12-27 03:22:26 +01:00 · 10d68d256c
parent 37f89ca76f
commit 10d68d256c
3 changed files with 88 additions and 16 deletions
--- a/python_bridge/config.yaml
+++ b/python_bridge/config.yaml
@ -98,7 +98,8 @@ stt:
  pause_threshold: 0.8

  # Maximale Aufnahmelänge pro Phrase in Sekunden
-  phrase_time_limit: 15
+  # Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s)
+  phrase_time_limit: 60

 # ============================================================================
 # Termux (Android) Einstellungen
--- a/python_bridge/stt_engine.py
+++ b/python_bridge/stt_engine.py
@ -28,7 +28,7 @@ class STTEngine:
        self,
        energy_threshold: int = 300,
        pause_threshold: float = 0.8,
-        phrase_time_limit: int = 15,
+        phrase_time_limit: int = 60,
        service: str = "google",
        language: str = "de-DE"
    ):
--- a/python_bridge/tts_engine.py
+++ b/python_bridge/tts_engine.py
@ -108,6 +108,10 @@ class Pyttsx3Engine(TTSEngine):
 class GTTSEngine(TTSEngine):
    """TTS using Google Text-to-Speech (online, better quality)"""

+    # Maximale Chunk-Größe für gTTS (Zeichen)
+    # Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit
+    MAX_CHUNK_SIZE = 500
+
    def __init__(self, language: str = "de"):
        from gtts import gTTS
        import pygame
@ -122,8 +126,60 @@ class GTTSEngine(TTSEngine):

        logger.info(f"gTTS engine initialized (language: {language})")

+    def _split_text_into_chunks(self, text: str) -> list:
+        """
+        Teilt langen Text in Chunks auf.
+
+        Versucht an Satzenden zu splitten (. ! ?) für natürlichere Pausen.
+        """
+        if len(text) <= self.MAX_CHUNK_SIZE:
+            return [text]
+
+        chunks = []
+        current_chunk = ""
+
+        # Teile nach Sätzen (., !, ?)
+        import re
+        sentences = re.split(r'(?<=[.!?])\s+', text)
+
+        for sentence in sentences:
+            # Wenn Satz selbst zu lang ist, teile nach Kommas oder Wörtern
+            if len(sentence) > self.MAX_CHUNK_SIZE:
+                # Teile nach Kommas
+                parts = re.split(r'(?<=,)\s+', sentence)
+                for part in parts:
+                    if len(part) > self.MAX_CHUNK_SIZE:
+                        # Letzter Ausweg: Teile nach Wörtern
+                        words = part.split()
+                        for word in words:
+                            if len(current_chunk) + len(word) + 1 > self.MAX_CHUNK_SIZE:
+                                if current_chunk:
+                                    chunks.append(current_chunk.strip())
+                                current_chunk = word
+                            else:
+                                current_chunk += " " + word if current_chunk else word
+                    elif len(current_chunk) + len(part) + 1 > self.MAX_CHUNK_SIZE:
+                        if current_chunk:
+                            chunks.append(current_chunk.strip())
+                        current_chunk = part
+                    else:
+                        current_chunk += " " + part if current_chunk else part
+            elif len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE:
+                if current_chunk:
+                    chunks.append(current_chunk.strip())
+                current_chunk = sentence
+            else:
+                current_chunk += " " + sentence if current_chunk else sentence
+
+        # Letzten Chunk hinzufügen
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+
+        logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)")
+        return chunks
+
    def speak(self, text: str) -> None:
-        """Speak text (blocking)"""
+        """Speak text (blocking) - teilt lange Texte automatisch auf"""
        from gtts import gTTS
        import pygame
        import tempfile
@ -131,23 +187,38 @@ class GTTSEngine(TTSEngine):

        self._speaking = True
        try:
-            # Generate audio file
-            tts = gTTS(text=text, lang=self.language)
+            # Teile langen Text in Chunks
+            chunks = self._split_text_into_chunks(text)

-            with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
-                temp_path = f.name
-                tts.save(temp_path)
+            for i, chunk in enumerate(chunks):
+                if self._stop_flag:
+                    break

-            # Play audio
-            pygame.mixer.music.load(temp_path)
-            pygame.mixer.music.play()
+                if not chunk.strip():
+                    continue

-            # Wait for playback to finish
-            while pygame.mixer.music.get_busy():
-                pygame.time.Clock().tick(10)
+                logger.debug(f"Spreche Chunk {i+1}/{len(chunks)}: {chunk[:50]}...")

-            # Cleanup
-            os.unlink(temp_path)
+                # Generate audio file
+                tts = gTTS(text=chunk, lang=self.language)
+
+                with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
+                    temp_path = f.name
+                    tts.save(temp_path)
+
+                # Play audio
+                pygame.mixer.music.load(temp_path)
+                pygame.mixer.music.play()
+
+                # Wait for playback to finish
+                while pygame.mixer.music.get_busy() and not self._stop_flag:
+                    pygame.time.Clock().tick(10)
+
+                # Cleanup
+                try:
+                    os.unlink(temp_path)
+                except:
+                    pass

        except Exception as e:
            logger.error(f"gTTS error: {e}")