tts in chungs und stt auf 60 sekunden erhöht

This commit is contained in:
duffyduck 2025-12-27 03:22:26 +01:00
parent 37f89ca76f
commit 10d68d256c
3 changed files with 88 additions and 16 deletions

View File

@ -98,7 +98,8 @@ stt:
pause_threshold: 0.8 pause_threshold: 0.8
# Maximale Aufnahmelänge pro Phrase in Sekunden # Maximale Aufnahmelänge pro Phrase in Sekunden
phrase_time_limit: 15 # Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s)
phrase_time_limit: 60
# ============================================================================ # ============================================================================
# Termux (Android) Einstellungen # Termux (Android) Einstellungen

View File

@ -28,7 +28,7 @@ class STTEngine:
self, self,
energy_threshold: int = 300, energy_threshold: int = 300,
pause_threshold: float = 0.8, pause_threshold: float = 0.8,
phrase_time_limit: int = 15, phrase_time_limit: int = 60,
service: str = "google", service: str = "google",
language: str = "de-DE" language: str = "de-DE"
): ):

View File

@ -108,6 +108,10 @@ class Pyttsx3Engine(TTSEngine):
class GTTSEngine(TTSEngine): class GTTSEngine(TTSEngine):
"""TTS using Google Text-to-Speech (online, better quality)""" """TTS using Google Text-to-Speech (online, better quality)"""
# Maximale Chunk-Größe für gTTS (Zeichen)
# Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit
MAX_CHUNK_SIZE = 500
def __init__(self, language: str = "de"): def __init__(self, language: str = "de"):
from gtts import gTTS from gtts import gTTS
import pygame import pygame
@ -122,8 +126,60 @@ class GTTSEngine(TTSEngine):
logger.info(f"gTTS engine initialized (language: {language})") logger.info(f"gTTS engine initialized (language: {language})")
def _split_text_into_chunks(self, text: str) -> list:
"""
Teilt langen Text in Chunks auf.
Versucht an Satzenden zu splitten (. ! ?) für natürlichere Pausen.
"""
if len(text) <= self.MAX_CHUNK_SIZE:
return [text]
chunks = []
current_chunk = ""
# Teile nach Sätzen (., !, ?)
import re
sentences = re.split(r'(?<=[.!?])\s+', text)
for sentence in sentences:
# Wenn Satz selbst zu lang ist, teile nach Kommas oder Wörtern
if len(sentence) > self.MAX_CHUNK_SIZE:
# Teile nach Kommas
parts = re.split(r'(?<=,)\s+', sentence)
for part in parts:
if len(part) > self.MAX_CHUNK_SIZE:
# Letzter Ausweg: Teile nach Wörtern
words = part.split()
for word in words:
if len(current_chunk) + len(word) + 1 > self.MAX_CHUNK_SIZE:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = word
else:
current_chunk += " " + word if current_chunk else word
elif len(current_chunk) + len(part) + 1 > self.MAX_CHUNK_SIZE:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = part
else:
current_chunk += " " + part if current_chunk else part
elif len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE:
if current_chunk:
chunks.append(current_chunk.strip())
current_chunk = sentence
else:
current_chunk += " " + sentence if current_chunk else sentence
# Letzten Chunk hinzufügen
if current_chunk:
chunks.append(current_chunk.strip())
logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)")
return chunks
def speak(self, text: str) -> None: def speak(self, text: str) -> None:
"""Speak text (blocking)""" """Speak text (blocking) - teilt lange Texte automatisch auf"""
from gtts import gTTS from gtts import gTTS
import pygame import pygame
import tempfile import tempfile
@ -131,8 +187,20 @@ class GTTSEngine(TTSEngine):
self._speaking = True self._speaking = True
try: try:
# Teile langen Text in Chunks
chunks = self._split_text_into_chunks(text)
for i, chunk in enumerate(chunks):
if self._stop_flag:
break
if not chunk.strip():
continue
logger.debug(f"Spreche Chunk {i+1}/{len(chunks)}: {chunk[:50]}...")
# Generate audio file # Generate audio file
tts = gTTS(text=text, lang=self.language) tts = gTTS(text=chunk, lang=self.language)
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f: with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
temp_path = f.name temp_path = f.name
@ -143,11 +211,14 @@ class GTTSEngine(TTSEngine):
pygame.mixer.music.play() pygame.mixer.music.play()
# Wait for playback to finish # Wait for playback to finish
while pygame.mixer.music.get_busy(): while pygame.mixer.music.get_busy() and not self._stop_flag:
pygame.time.Clock().tick(10) pygame.time.Clock().tick(10)
# Cleanup # Cleanup
try:
os.unlink(temp_path) os.unlink(temp_path)
except:
pass
except Exception as e: except Exception as e:
logger.error(f"gTTS error: {e}") logger.error(f"gTTS error: {e}")