tts in chungs und stt auf 60 sekunden erhöht

2025-12-27 03:22:26 +01:00 · 2025-12-27 03:22:26 +01:00 · 10d68d256c
parent 37f89ca76f
commit 10d68d256c
3 changed files with 88 additions and 16 deletions
--- a/python_bridge/config.yaml
+++ b/python_bridge/config.yaml
@ -98,7 +98,8 @@ stt:
  pause_threshold: 0.8
  # Maximale Aufnahmelänge pro Phrase in Sekunden
-  phrase_time_limit: 15
+  # Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s)
  phrase_time_limit: 60
 # ============================================================================
 # Termux (Android) Einstellungen
--- a/python_bridge/stt_engine.py
+++ b/python_bridge/stt_engine.py
@ -28,7 +28,7 @@ class STTEngine:
        self,
        energy_threshold: int = 300,
        pause_threshold: float = 0.8,
-        phrase_time_limit: int = 15,
+        phrase_time_limit: int = 60,
        service: str = "google",
        language: str = "de-DE"
    ):
--- a/python_bridge/tts_engine.py
+++ b/python_bridge/tts_engine.py
@ -108,6 +108,10 @@ class Pyttsx3Engine(TTSEngine):
 class GTTSEngine(TTSEngine):
    """TTS using Google Text-to-Speech (online, better quality)"""
    # Maximale Chunk-Größe für gTTS (Zeichen)
    # Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit
    MAX_CHUNK_SIZE = 500
    def __init__(self, language: str = "de"):
        from gtts import gTTS
        import pygame
@ -122,8 +126,60 @@ class GTTSEngine(TTSEngine):
        logger.info(f"gTTS engine initialized (language: {language})")
    def _split_text_into_chunks(self, text: str) -> list:
        """
        Teilt langen Text in Chunks auf.
        Versucht an Satzenden zu splitten (. ! ?) für natürlichere Pausen.
        """
        if len(text) <= self.MAX_CHUNK_SIZE:
            return [text]
        chunks = []
        current_chunk = ""
        # Teile nach Sätzen (., !, ?)
        import re
        sentences = re.split(r'(?<=[.!?])\s+', text)
        for sentence in sentences:
            # Wenn Satz selbst zu lang ist, teile nach Kommas oder Wörtern
            if len(sentence) > self.MAX_CHUNK_SIZE:
                # Teile nach Kommas
                parts = re.split(r'(?<=,)\s+', sentence)
                for part in parts:
                    if len(part) > self.MAX_CHUNK_SIZE:
                        # Letzter Ausweg: Teile nach Wörtern
                        words = part.split()
                        for word in words:
                            if len(current_chunk) + len(word) + 1 > self.MAX_CHUNK_SIZE:
                                if current_chunk:
                                    chunks.append(current_chunk.strip())
                                current_chunk = word
                            else:
                                current_chunk += " " + word if current_chunk else word
                    elif len(current_chunk) + len(part) + 1 > self.MAX_CHUNK_SIZE:
                        if current_chunk:
                            chunks.append(current_chunk.strip())
                        current_chunk = part
                    else:
                        current_chunk += " " + part if current_chunk else part
            elif len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE:
                if current_chunk:
                    chunks.append(current_chunk.strip())
                current_chunk = sentence
            else:
                current_chunk += " " + sentence if current_chunk else sentence
        # Letzten Chunk hinzufügen
        if current_chunk:
            chunks.append(current_chunk.strip())
        logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)")
        return chunks
    def speak(self, text: str) -> None:
-        """Speak text (blocking)"""
+        """Speak text (blocking) - teilt lange Texte automatisch auf"""
        from gtts import gTTS
        import pygame
        import tempfile
@ -131,8 +187,20 @@ class GTTSEngine(TTSEngine):
        self._speaking = True
        try:
            # Teile langen Text in Chunks
            chunks = self._split_text_into_chunks(text)
            for i, chunk in enumerate(chunks):
                if self._stop_flag:
                    break
                if not chunk.strip():
                    continue
                logger.debug(f"Spreche Chunk {i+1}/{len(chunks)}: {chunk[:50]}...")
                # Generate audio file
-            tts = gTTS(text=text, lang=self.language)
+                tts = gTTS(text=chunk, lang=self.language)
                with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
                    temp_path = f.name
@ -143,11 +211,14 @@ class GTTSEngine(TTSEngine):
                pygame.mixer.music.play()
                # Wait for playback to finish
-            while pygame.mixer.music.get_busy():
+                while pygame.mixer.music.get_busy() and not self._stop_flag:
                    pygame.time.Clock().tick(10)
                # Cleanup
                try:
                    os.unlink(temp_path)
                except:
                    pass
        except Exception as e:
            logger.error(f"gTTS error: {e}")