fix: Comprehensive markdown/formatting cleanup for TTS (Piper + XTTS)
- Remove **bold**, *italic*, `code`, code blocks, links, headers, quotes, lists - Replace newlines with natural pauses (period/comma) - Remove quotation marks, empty brackets - Fixes text being swallowed/garbled by TTS engines Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+16
-4
@@ -201,11 +201,23 @@ class VoiceEngine:
|
||||
return None
|
||||
|
||||
try:
|
||||
# Langen Text in Saetze aufteilen (Piper hat Limits bei langen Texten)
|
||||
# Markdown + Sonderzeichen entfernen fuer natuerliche Sprache
|
||||
import re
|
||||
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
||||
# Markdown-Formatierung entfernen
|
||||
sentences = [re.sub(r'\*\*([^*]+)\*\*', r'\1', s).strip() for s in sentences if s.strip()]
|
||||
clean = text.strip()
|
||||
clean = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean) # **fett**
|
||||
clean = re.sub(r'\*([^*]+)\*', r'\1', clean) # *kursiv*
|
||||
clean = re.sub(r'`[^`]+`', '', clean) # `code`
|
||||
clean = re.sub(r'```[\s\S]*?```', '', clean) # Code-Bloecke
|
||||
clean = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean) # [text](url)
|
||||
clean = re.sub(r'#{1,6}\s*', '', clean) # ### Ueberschriften
|
||||
clean = re.sub(r'>\s*', '', clean) # > Zitate
|
||||
clean = re.sub(r'[-*]\s+', '', clean) # Listen
|
||||
clean = re.sub(r'\n{2,}', '. ', clean) # Absaetze
|
||||
clean = re.sub(r'\n', ', ', clean) # Zeilenumbrueche
|
||||
clean = re.sub(r'\s{2,}', ' ', clean) # Mehrfach-Leerzeichen
|
||||
clean = re.sub(r'["""„]', '', clean) # Anfuehrungszeichen
|
||||
sentences = re.split(r'(?<=[.!?])\s+', clean)
|
||||
sentences = [s.strip() for s in sentences if s.strip()]
|
||||
|
||||
if not sentences:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user