fix: Comprehensive markdown/formatting cleanup for TTS (Piper + XTTS)

- Remove **bold**, *italic*, `code`, code blocks, links, headers, quotes, lists
- Replace newlines with natural pauses (period/comma)
- Remove quotation marks, empty brackets
- Fixes text being swallowed/garbled by TTS engines

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-11 11:47:04 +02:00
parent e3a0cfb55a
commit 764619f076
2 changed files with 32 additions and 6 deletions
+16 -4
View File
@@ -201,11 +201,23 @@ class VoiceEngine:
return None
try:
# Langen Text in Saetze aufteilen (Piper hat Limits bei langen Texten)
# Markdown + Sonderzeichen entfernen fuer natuerliche Sprache
import re
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
# Markdown-Formatierung entfernen
sentences = [re.sub(r'\*\*([^*]+)\*\*', r'\1', s).strip() for s in sentences if s.strip()]
clean = text.strip()
clean = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean) # **fett**
clean = re.sub(r'\*([^*]+)\*', r'\1', clean) # *kursiv*
clean = re.sub(r'`[^`]+`', '', clean) # `code`
clean = re.sub(r'```[\s\S]*?```', '', clean) # Code-Bloecke
clean = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean) # [text](url)
clean = re.sub(r'#{1,6}\s*', '', clean) # ### Ueberschriften
clean = re.sub(r'>\s*', '', clean) # > Zitate
clean = re.sub(r'[-*]\s+', '', clean) # Listen
clean = re.sub(r'\n{2,}', '. ', clean) # Absaetze
clean = re.sub(r'\n', ', ', clean) # Zeilenumbrueche
clean = re.sub(r'\s{2,}', ' ', clean) # Mehrfach-Leerzeichen
clean = re.sub(r'["""„]', '', clean) # Anfuehrungszeichen
sentences = re.split(r'(?<=[.!?])\s+', clean)
sentences = [s.strip() for s in sentences if s.strip()]
if not sentences:
return None