fix: XTTS bridge splits text into sentences sequentially

- XTTS-Bridge does sentence splitting (not ARIA-Bridge)
- Sequential rendering: correct order guaranteed
- Each sentence sent as separate xtts_response
- Markdown removal before splitting
- App starts playback after first sentence (faster UX)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
duffyduck 2026-04-10 02:03:29 +02:00
parent 06bc456221
commit b3d3b8b6bc
2 changed files with 45 additions and 44 deletions

View File

@ -851,27 +851,20 @@ class ARIABridge:
tts_engine = getattr(self, 'tts_engine_type', 'piper') tts_engine = getattr(self, 'tts_engine_type', 'piper')
if tts_engine == "xtts": if tts_engine == "xtts":
# XTTS: Lange Texte satzweise aufteilen (WebSocket-Limit + schnellere Wiedergabe) # XTTS: Ganzen Text senden, XTTS-Bridge teilt satzweise auf
import re as _re
xtts_voice = getattr(self, 'xtts_voice', '') xtts_voice = getattr(self, 'xtts_voice', '')
clean_text = _re.sub(r'\*\*([^*]+)\*\*', r'\1', text).strip()
sentences = _re.split(r'(?<=[.!?])\s+', clean_text)
sentences = [s.strip() for s in sentences if s.strip()]
if not sentences:
sentences = [clean_text]
try: try:
for sentence in sentences: await self._send_to_rvs({
await self._send_to_rvs({ "type": "xtts_request",
"type": "xtts_request", "payload": {
"payload": { "text": text,
"text": sentence, "voice": xtts_voice,
"voice": xtts_voice, "language": "de",
"language": "de", "requestId": str(uuid.uuid4()),
"requestId": str(uuid.uuid4()), },
}, "timestamp": int(asyncio.get_event_loop().time() * 1000),
"timestamp": int(asyncio.get_event_loop().time() * 1000), })
}) logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60])
logger.info("[core] XTTS-Request gesendet (%s, %d Saetze): '%s'", xtts_voice or "default", len(sentences), text[:60])
except Exception as e: except Exception as e:
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e) logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
# Fallback auf Piper # Fallback auf Piper

View File

@ -97,39 +97,47 @@ async function handleTTSRequest(payload) {
const { text, voice, requestId, language } = payload; const { text, voice, requestId, language } = payload;
if (!text) return; if (!text) return;
log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`); // Markdown entfernen
const cleanText = text.replace(/\*\*([^*]+)\*\*/g, "$1").trim();
// Text in Saetze aufteilen (sequentiell rendern fuer korrekte Reihenfolge)
const sentences = cleanText.split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 0);
if (sentences.length === 0) return;
log(`TTS-Request: "${cleanText.slice(0, 60)}..." (${sentences.length} Saetze, voice: ${voice || "default"}, lang: ${language || "de"})`);
try { try {
// Voice-Sample Pfad bestimmen
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null; const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample); const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
// XTTS API aufrufen // Jeden Satz sequentiell rendern und sofort senden
const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null); for (let i = 0; i < sentences.length; i++) {
const sentence = sentences[i];
try {
const audioBuffer = await callXTTSAPI(sentence, language || "de", hasCustomVoice ? voiceSample : null);
if (audioBuffer && audioBuffer.length > 100) { if (audioBuffer && audioBuffer.length > 100) {
const base64 = audioBuffer.toString("base64"); const base64 = audioBuffer.toString("base64");
log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`); log(`TTS [${i + 1}/${sentences.length}]: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB) — "${sentence.slice(0, 40)}..."`);
sendToRVS({ sendToRVS({
type: "xtts_response", type: "xtts_response",
payload: { payload: {
requestId: requestId || "", requestId: `${requestId || ""}_${i}`,
base64, base64,
mimeType: "audio/wav", mimeType: "audio/wav",
voice: voice || "default", voice: voice || "default",
engine: "xtts", engine: "xtts",
}, },
timestamp: Date.now(), timestamp: Date.now(),
}); });
} else { }
log("TTS: Leeres Audio erhalten"); } catch (sentenceErr) {
sendToRVS({ log(`TTS [${i + 1}/${sentences.length}] Fehler: ${sentenceErr.message} — ueberspringe`);
type: "xtts_response", }
payload: { requestId, error: "Leeres Audio" },
timestamp: Date.now(),
});
} }
log(`TTS komplett: ${sentences.length} Saetze gerendert`);
} catch (err) { } catch (err) {
log(`TTS Fehler: ${err.message}`); log(`TTS Fehler: ${err.message}`);
sendToRVS({ sendToRVS({