tts in chungs und stt auf 60 sekunden erhöht
This commit is contained in:
parent
37f89ca76f
commit
10d68d256c
|
|
@ -98,7 +98,8 @@ stt:
|
||||||
pause_threshold: 0.8
|
pause_threshold: 0.8
|
||||||
|
|
||||||
# Maximale Aufnahmelänge pro Phrase in Sekunden
|
# Maximale Aufnahmelänge pro Phrase in Sekunden
|
||||||
phrase_time_limit: 15
|
# Bei langen Sätzen höher setzen (Google STT unterstützt bis ~60s)
|
||||||
|
phrase_time_limit: 60
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# Termux (Android) Einstellungen
|
# Termux (Android) Einstellungen
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ class STTEngine:
|
||||||
self,
|
self,
|
||||||
energy_threshold: int = 300,
|
energy_threshold: int = 300,
|
||||||
pause_threshold: float = 0.8,
|
pause_threshold: float = 0.8,
|
||||||
phrase_time_limit: int = 15,
|
phrase_time_limit: int = 60,
|
||||||
service: str = "google",
|
service: str = "google",
|
||||||
language: str = "de-DE"
|
language: str = "de-DE"
|
||||||
):
|
):
|
||||||
|
|
|
||||||
|
|
@ -108,6 +108,10 @@ class Pyttsx3Engine(TTSEngine):
|
||||||
class GTTSEngine(TTSEngine):
|
class GTTSEngine(TTSEngine):
|
||||||
"""TTS using Google Text-to-Speech (online, better quality)"""
|
"""TTS using Google Text-to-Speech (online, better quality)"""
|
||||||
|
|
||||||
|
# Maximale Chunk-Größe für gTTS (Zeichen)
|
||||||
|
# Google hat ein Limit von ~5000 Zeichen, wir nehmen weniger für Sicherheit
|
||||||
|
MAX_CHUNK_SIZE = 500
|
||||||
|
|
||||||
def __init__(self, language: str = "de"):
|
def __init__(self, language: str = "de"):
|
||||||
from gtts import gTTS
|
from gtts import gTTS
|
||||||
import pygame
|
import pygame
|
||||||
|
|
@ -122,8 +126,60 @@ class GTTSEngine(TTSEngine):
|
||||||
|
|
||||||
logger.info(f"gTTS engine initialized (language: {language})")
|
logger.info(f"gTTS engine initialized (language: {language})")
|
||||||
|
|
||||||
|
def _split_text_into_chunks(self, text: str) -> list:
|
||||||
|
"""
|
||||||
|
Teilt langen Text in Chunks auf.
|
||||||
|
|
||||||
|
Versucht an Satzenden zu splitten (. ! ?) für natürlichere Pausen.
|
||||||
|
"""
|
||||||
|
if len(text) <= self.MAX_CHUNK_SIZE:
|
||||||
|
return [text]
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
current_chunk = ""
|
||||||
|
|
||||||
|
# Teile nach Sätzen (., !, ?)
|
||||||
|
import re
|
||||||
|
sentences = re.split(r'(?<=[.!?])\s+', text)
|
||||||
|
|
||||||
|
for sentence in sentences:
|
||||||
|
# Wenn Satz selbst zu lang ist, teile nach Kommas oder Wörtern
|
||||||
|
if len(sentence) > self.MAX_CHUNK_SIZE:
|
||||||
|
# Teile nach Kommas
|
||||||
|
parts = re.split(r'(?<=,)\s+', sentence)
|
||||||
|
for part in parts:
|
||||||
|
if len(part) > self.MAX_CHUNK_SIZE:
|
||||||
|
# Letzter Ausweg: Teile nach Wörtern
|
||||||
|
words = part.split()
|
||||||
|
for word in words:
|
||||||
|
if len(current_chunk) + len(word) + 1 > self.MAX_CHUNK_SIZE:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
current_chunk = word
|
||||||
|
else:
|
||||||
|
current_chunk += " " + word if current_chunk else word
|
||||||
|
elif len(current_chunk) + len(part) + 1 > self.MAX_CHUNK_SIZE:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
current_chunk = part
|
||||||
|
else:
|
||||||
|
current_chunk += " " + part if current_chunk else part
|
||||||
|
elif len(current_chunk) + len(sentence) + 1 > self.MAX_CHUNK_SIZE:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
current_chunk = sentence
|
||||||
|
else:
|
||||||
|
current_chunk += " " + sentence if current_chunk else sentence
|
||||||
|
|
||||||
|
# Letzten Chunk hinzufügen
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
|
||||||
|
logger.debug(f"Text in {len(chunks)} Chunks aufgeteilt ({len(text)} Zeichen)")
|
||||||
|
return chunks
|
||||||
|
|
||||||
def speak(self, text: str) -> None:
|
def speak(self, text: str) -> None:
|
||||||
"""Speak text (blocking)"""
|
"""Speak text (blocking) - teilt lange Texte automatisch auf"""
|
||||||
from gtts import gTTS
|
from gtts import gTTS
|
||||||
import pygame
|
import pygame
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
@ -131,8 +187,20 @@ class GTTSEngine(TTSEngine):
|
||||||
|
|
||||||
self._speaking = True
|
self._speaking = True
|
||||||
try:
|
try:
|
||||||
|
# Teile langen Text in Chunks
|
||||||
|
chunks = self._split_text_into_chunks(text)
|
||||||
|
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
if self._stop_flag:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not chunk.strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.debug(f"Spreche Chunk {i+1}/{len(chunks)}: {chunk[:50]}...")
|
||||||
|
|
||||||
# Generate audio file
|
# Generate audio file
|
||||||
tts = gTTS(text=text, lang=self.language)
|
tts = gTTS(text=chunk, lang=self.language)
|
||||||
|
|
||||||
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
|
with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as f:
|
||||||
temp_path = f.name
|
temp_path = f.name
|
||||||
|
|
@ -143,11 +211,14 @@ class GTTSEngine(TTSEngine):
|
||||||
pygame.mixer.music.play()
|
pygame.mixer.music.play()
|
||||||
|
|
||||||
# Wait for playback to finish
|
# Wait for playback to finish
|
||||||
while pygame.mixer.music.get_busy():
|
while pygame.mixer.music.get_busy() and not self._stop_flag:
|
||||||
pygame.time.Clock().tick(10)
|
pygame.time.Clock().tick(10)
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
|
try:
|
||||||
os.unlink(temp_path)
|
os.unlink(temp_path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"gTTS error: {e}")
|
logger.error(f"gTTS error: {e}")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue