From 727e23d04503398674f6683a939047e000787435 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Mon, 29 Dec 2025 02:11:08 +0100 Subject: [PATCH] tts speed added --- python_bridge/chat_audio_bridge.py | 6 ++- python_bridge/config.yaml | 6 ++- python_bridge/tts_engine.py | 66 ++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 7 deletions(-) diff --git a/python_bridge/chat_audio_bridge.py b/python_bridge/chat_audio_bridge.py index 3f19348..60e56f4 100755 --- a/python_bridge/chat_audio_bridge.py +++ b/python_bridge/chat_audio_bridge.py @@ -293,9 +293,11 @@ class ClaudesEyesAudioBridge: engine_type=engine_type, language=tts_config.get("language", "de"), rate=tts_config.get("rate", 150), - volume=tts_config.get("volume", 0.9) + volume=tts_config.get("volume", 0.9), + speed=tts_config.get("speed", 1.0) # Geschwindigkeit für gTTS ) - console.print(f"[green]TTS Engine erstellt ({engine_type})[/green]") + speed_info = f", {tts_config.get('speed', 1.0)}x" if engine_type == "gtts" and tts_config.get("speed", 1.0) != 1.0 else "" + console.print(f"[green]TTS Engine erstellt ({engine_type}{speed_info})[/green]") except Exception as e: console.print(f"[red]TTS Engine konnte nicht erstellt werden: {e}[/red]") logger.error(f"TTS Engine Fehler: {e}", exc_info=True) diff --git a/python_bridge/config.yaml b/python_bridge/config.yaml index a00bb1e..e843d9e 100644 --- a/python_bridge/config.yaml +++ b/python_bridge/config.yaml @@ -70,8 +70,12 @@ tts: # Sprache language: "de" - # Sprechgeschwindigkeit (nur pyttsx3: Wörter pro Minute, 100-200) + # Sprechgeschwindigkeit + # - pyttsx3: Wörter pro Minute (100-200) + # - gtts: Geschwindigkeitsfaktor (1.0 = normal, 1.25 = 25% schneller, 1.5 = 50% schneller) + # Benötigt pydub: pip install pydub rate: 150 + speed: 1.25 # Nur für gtts - 1.25x ist angenehm schneller # Lautstärke (nur pyttsx3) volume: 0.9 diff --git a/python_bridge/tts_engine.py b/python_bridge/tts_engine.py index 599fa09..9723723 100644 --- a/python_bridge/tts_engine.py +++ b/python_bridge/tts_engine.py @@ -112,19 +112,26 @@ class GTTSEngine(TTSEngine): # Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit MAX_CHUNK_SIZE = 500 - def __init__(self, language: str = "de"): + def __init__(self, language: str = "de", speed: float = 1.0): + """ + Args: + language: Sprache (z.B. "de", "en") + speed: Geschwindigkeit (1.0 = normal, 1.5 = 50% schneller, 2.0 = doppelt so schnell) + """ from gtts import gTTS import pygame pygame.mixer.init() self.language = language + self.speed = speed # Geschwindigkeitsfaktor self._speaking = False self._queue = queue.Queue() self._thread: Optional[threading.Thread] = None self._stop_flag = False - logger.info(f"gTTS engine initialized (language: {language})") + speed_info = f", speed: {speed}x" if speed != 1.0 else "" + logger.info(f"gTTS engine initialized (language: {language}{speed_info})") def _split_text_into_chunks(self, text: str) -> list: """ @@ -178,6 +185,42 @@ class GTTSEngine(TTSEngine): logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)") return chunks + def _speed_up_audio(self, input_path: str, output_path: str) -> bool: + """ + Beschleunigt eine Audio-Datei mit pydub. + + Args: + input_path: Pfad zur Original-MP3 + output_path: Pfad für beschleunigte MP3 + + Returns: + True wenn erfolgreich, False bei Fehler + """ + try: + from pydub import AudioSegment + + # Audio laden + audio = AudioSegment.from_mp3(input_path) + + # Geschwindigkeit ändern (ohne Tonhöhe zu verändern ist komplexer, + # aber einfaches Speedup durch frame_rate Änderung klingt akzeptabel) + # Methode: Frame-Rate erhöhen → schnellere Wiedergabe + new_frame_rate = int(audio.frame_rate * self.speed) + speedup_audio = audio._spawn(audio.raw_data, overrides={ + "frame_rate": new_frame_rate + }).set_frame_rate(audio.frame_rate) + + # Speichern + speedup_audio.export(output_path, format="mp3") + return True + + except ImportError: + logger.warning("pydub nicht installiert - Geschwindigkeit nicht änderbar. Installiere mit: pip install pydub") + return False + except Exception as e: + logger.error(f"Audio-Speedup-Fehler: {e}") + return False + def speak(self, text: str) -> None: """Speak text (blocking) - teilt lange Texte automatisch auf""" from gtts import gTTS @@ -206,8 +249,20 @@ class GTTSEngine(TTSEngine): temp_path = f.name tts.save(temp_path) + # Geschwindigkeit anpassen wenn != 1.0 + play_path = temp_path + speedup_path = None + + if self.speed != 1.0: + speedup_path = temp_path.replace('.mp3', '_fast.mp3') + if self._speed_up_audio(temp_path, speedup_path): + play_path = speedup_path + else: + # Fallback: Original abspielen + logger.debug("Speedup fehlgeschlagen, spiele Original") + # Play audio - pygame.mixer.music.load(temp_path) + pygame.mixer.music.load(play_path) pygame.mixer.music.play() # Wait for playback to finish @@ -217,6 +272,8 @@ class GTTSEngine(TTSEngine): # Cleanup try: os.unlink(temp_path) + if speedup_path and os.path.exists(speedup_path): + os.unlink(speedup_path) except: pass @@ -276,7 +333,8 @@ def create_tts_engine(engine_type: str = "pyttsx3", **kwargs) -> TTSEngine: ) elif engine_type == "gtts": return GTTSEngine( - language=kwargs.get("language", "de") + language=kwargs.get("language", "de"), + speed=kwargs.get("speed", 1.0) ) else: raise ValueError(f"Unknown TTS engine: {engine_type}")