diff --git a/python_bridge/config.yaml b/python_bridge/config.yaml index 58be18b..0e05e20 100644 --- a/python_bridge/config.yaml +++ b/python_bridge/config.yaml @@ -98,7 +98,8 @@ stt: pause_threshold: 0.8 # Maximale Aufnahmelänge pro Phrase in Sekunden - phrase_time_limit: 15 + # Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s) + phrase_time_limit: 60 # ============================================================================ # Termux (Android) Einstellungen diff --git a/python_bridge/stt_engine.py b/python_bridge/stt_engine.py index f8c61ef..09d3345 100644 --- a/python_bridge/stt_engine.py +++ b/python_bridge/stt_engine.py @@ -28,7 +28,7 @@ class STTEngine: self, energy_threshold: int = 300, pause_threshold: float = 0.8, - phrase_time_limit: int = 15, + phrase_time_limit: int = 60, service: str = "google", language: str = "de-DE" ): diff --git a/python_bridge/tts_engine.py b/python_bridge/tts_engine.py index 447d72a..ba410a0 100644 --- a/python_bridge/tts_engine.py +++ b/python_bridge/tts_engine.py @@ -108,6 +108,10 @@ class Pyttsx3Engine(TTSEngine): class GTTSEngine(TTSEngine): """TTS using Google Text-to-Speech (online, better quality)""" + # Maximale Chunk-Größe für gTTS (Zeichen) + # Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit + MAX_CHUNK_SIZE = 500 + def __init__(self, language: str = "de"): from gtts import gTTS import pygame @@ -122,8 +126,60 @@ class GTTSEngine(TTSEngine): logger.info(f"gTTS engine initialized (language: {language})") + def _split_text_into_chunks(self, text: str) -> list: + """ + Teilt langen Text in Chunks auf. + + Versucht an Satzenden zu splitten (. ! ?) für natürlichere Pausen. + """ + if len(text) <= self.MAX_CHUNK_SIZE: + return [text] + + chunks = [] + current_chunk = "" + + # Teile nach Sätzen (., !, ?) + import re + sentences = re.split(r'(?<=[.!?])\s+', text) + + for sentence in sentences: + # Wenn Satz selbst zu lang ist, teile nach Kommas oder Wörtern + if len(sentence) > self.MAX_CHUNK_SIZE: + # Teile nach Kommas + parts = re.split(r'(?<=,)\s+', sentence) + for part in parts: + if len(part) > self.MAX_CHUNK_SIZE: + # Letzter Ausweg: Teile nach Wörtern + words = part.split() + for word in words: + if len(current_chunk) + len(word) + 1 > self.MAX_CHUNK_SIZE: + if current_chunk: + chunks.append(current_chunk.strip()) + current_chunk = word + else: + current_chunk += " " + word if current_chunk else word + elif len(current_chunk) + len(part) + 1 > self.MAX_CHUNK_SIZE: + if current_chunk: + chunks.append(current_chunk.strip()) + current_chunk = part + else: + current_chunk += " " + part if current_chunk else part + elif len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE: + if current_chunk: + chunks.append(current_chunk.strip()) + current_chunk = sentence + else: + current_chunk += " " + sentence if current_chunk else sentence + + # Letzten Chunk hinzufügen + if current_chunk: + chunks.append(current_chunk.strip()) + + logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)") + return chunks + def speak(self, text: str) -> None: - """Speak text (blocking)""" + """Speak text (blocking) - teilt lange Texte automatisch auf""" from gtts import gTTS import pygame import tempfile @@ -131,23 +187,38 @@ class GTTSEngine(TTSEngine): self._speaking = True try: - # Generate audio file - tts = gTTS(text=text, lang=self.language) + # Teile langen Text in Chunks + chunks = self._split_text_into_chunks(text) - with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f: - temp_path = f.name - tts.save(temp_path) + for i, chunk in enumerate(chunks): + if self._stop_flag: + break - # Play audio - pygame.mixer.music.load(temp_path) - pygame.mixer.music.play() + if not chunk.strip(): + continue - # Wait for playback to finish - while pygame.mixer.music.get_busy(): - pygame.time.Clock().tick(10) + logger.debug(f"Spreche Chunk {i+1}/{len(chunks)}: {chunk[:50]}...") - # Cleanup - os.unlink(temp_path) + # Generate audio file + tts = gTTS(text=chunk, lang=self.language) + + with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f: + temp_path = f.name + tts.save(temp_path) + + # Play audio + pygame.mixer.music.load(temp_path) + pygame.mixer.music.play() + + # Wait for playback to finish + while pygame.mixer.music.get_busy() and not self._stop_flag: + pygame.time.Clock().tick(10) + + # Cleanup + try: + os.unlink(temp_path) + except: + pass except Exception as e: logger.error(f"gTTS error: {e}")