From 764619f076321e75310af85b73f09329952b06a8 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Sat, 11 Apr 2026 11:47:04 +0200 Subject: [PATCH] fix: Comprehensive markdown/formatting cleanup for TTS (Piper + XTTS) - Remove **bold**, *italic*, `code`, code blocks, links, headers, quotes, lists - Replace newlines with natural pauses (period/comma) - Remove quotation marks, empty brackets - Fixes text being swallowed/garbled by TTS engines Co-Authored-By: Claude Opus 4.6 (1M context) --- bridge/aria_bridge.py | 20 ++++++++++++++++---- xtts/bridge.js | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 1509c2e..c9d888a 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -201,11 +201,23 @@ class VoiceEngine: return None try: - # Langen Text in Saetze aufteilen (Piper hat Limits bei langen Texten) + # Markdown + Sonderzeichen entfernen fuer natuerliche Sprache import re - sentences = re.split(r'(?<=[.!?])\s+', text.strip()) - # Markdown-Formatierung entfernen - sentences = [re.sub(r'\*\*([^*]+)\*\*', r'\1', s).strip() for s in sentences if s.strip()] + clean = text.strip() + clean = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean) # **fett** + clean = re.sub(r'\*([^*]+)\*', r'\1', clean) # *kursiv* + clean = re.sub(r'`[^`]+`', '', clean) # `code` + clean = re.sub(r'```[\s\S]*?```', '', clean) # Code-Bloecke + clean = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean) # [text](url) + clean = re.sub(r'#{1,6}\s*', '', clean) # ### Ueberschriften + clean = re.sub(r'>\s*', '', clean) # > Zitate + clean = re.sub(r'[-*]\s+', '', clean) # Listen + clean = re.sub(r'\n{2,}', '. ', clean) # Absaetze + clean = re.sub(r'\n', ', ', clean) # Zeilenumbrueche + clean = re.sub(r'\s{2,}', ' ', clean) # Mehrfach-Leerzeichen + clean = re.sub(r'["""„]', '', clean) # Anfuehrungszeichen + sentences = re.split(r'(?<=[.!?])\s+', clean) + sentences = [s.strip() for s in sentences if s.strip()] if not sentences: return None diff --git a/xtts/bridge.js b/xtts/bridge.js index 3978034..54859bf 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -97,8 +97,22 @@ async function handleTTSRequest(payload) { const { text, voice, requestId, language } = payload; if (!text) return; - // Markdown entfernen - const cleanText = text.replace(/\*\*([^*]+)\*\*/g, "$1").trim(); + // Markdown + Sonderzeichen entfernen fuer natuerliche Sprache + let cleanText = text + .replace(/\*\*([^*]+)\*\*/g, "$1") // **fett** → fett + .replace(/\*([^*]+)\*/g, "$1") // *kursiv* → kursiv + .replace(/`([^`]+)`/g, "$1") // `code` → code + .replace(/```[\s\S]*?```/g, "") // Code-Bloecke entfernen + .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // [text](url) → text + .replace(/#{1,6}\s*/g, "") // ### Ueberschriften → entfernen + .replace(/>\s*/g, "") // > Zitate → entfernen + .replace(/[-*]\s+/g, "") // - Listen → entfernen + .replace(/\n{2,}/g, ". ") // Mehrere Newlines → Punkt + .replace(/\n/g, ", ") // Einzelne Newlines → Komma + .replace(/\s{2,}/g, " ") // Mehrfach-Leerzeichen + .replace(/["""„]/g, "") // Anfuehrungszeichen entfernen + .replace(/\(\)/g, "") // Leere Klammern + .trim(); // Text in Saetze aufteilen, dann zu Chunks von 2-3 Saetzen zusammenfassen // (mehr Kontext = konsistentere Stimme/Lautstaerke, aber nicht zu lang fuer WebSocket)