fix: XTTS bridge splits text into sentences sequentially
- XTTS-Bridge does sentence splitting (not ARIA-Bridge) - Sequential rendering: correct order guaranteed - Each sentence sent as separate xtts_response - Markdown removal before splitting - App starts playback after first sentence (faster UX) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
06bc456221
commit
b3d3b8b6bc
|
|
@ -851,27 +851,20 @@ class ARIABridge:
|
||||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||||
|
|
||||||
if tts_engine == "xtts":
|
if tts_engine == "xtts":
|
||||||
# XTTS: Lange Texte satzweise aufteilen (WebSocket-Limit + schnellere Wiedergabe)
|
# XTTS: Ganzen Text senden, XTTS-Bridge teilt satzweise auf
|
||||||
import re as _re
|
|
||||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||||
clean_text = _re.sub(r'\*\*([^*]+)\*\*', r'\1', text).strip()
|
|
||||||
sentences = _re.split(r'(?<=[.!?])\s+', clean_text)
|
|
||||||
sentences = [s.strip() for s in sentences if s.strip()]
|
|
||||||
if not sentences:
|
|
||||||
sentences = [clean_text]
|
|
||||||
try:
|
try:
|
||||||
for sentence in sentences:
|
await self._send_to_rvs({
|
||||||
await self._send_to_rvs({
|
"type": "xtts_request",
|
||||||
"type": "xtts_request",
|
"payload": {
|
||||||
"payload": {
|
"text": text,
|
||||||
"text": sentence,
|
"voice": xtts_voice,
|
||||||
"voice": xtts_voice,
|
"language": "de",
|
||||||
"language": "de",
|
"requestId": str(uuid.uuid4()),
|
||||||
"requestId": str(uuid.uuid4()),
|
},
|
||||||
},
|
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
})
|
||||||
})
|
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60])
|
||||||
logger.info("[core] XTTS-Request gesendet (%s, %d Saetze): '%s'", xtts_voice or "default", len(sentences), text[:60])
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
|
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
|
||||||
# Fallback auf Piper
|
# Fallback auf Piper
|
||||||
|
|
|
||||||
|
|
@ -97,39 +97,47 @@ async function handleTTSRequest(payload) {
|
||||||
const { text, voice, requestId, language } = payload;
|
const { text, voice, requestId, language } = payload;
|
||||||
if (!text) return;
|
if (!text) return;
|
||||||
|
|
||||||
log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`);
|
// Markdown entfernen
|
||||||
|
const cleanText = text.replace(/\*\*([^*]+)\*\*/g, "$1").trim();
|
||||||
|
|
||||||
|
// Text in Saetze aufteilen (sequentiell rendern fuer korrekte Reihenfolge)
|
||||||
|
const sentences = cleanText.split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 0);
|
||||||
|
if (sentences.length === 0) return;
|
||||||
|
|
||||||
|
log(`TTS-Request: "${cleanText.slice(0, 60)}..." (${sentences.length} Saetze, voice: ${voice || "default"}, lang: ${language || "de"})`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Voice-Sample Pfad bestimmen
|
|
||||||
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||||
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
||||||
|
|
||||||
// XTTS API aufrufen
|
// Jeden Satz sequentiell rendern und sofort senden
|
||||||
const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null);
|
for (let i = 0; i < sentences.length; i++) {
|
||||||
|
const sentence = sentences[i];
|
||||||
|
try {
|
||||||
|
const audioBuffer = await callXTTSAPI(sentence, language || "de", hasCustomVoice ? voiceSample : null);
|
||||||
|
|
||||||
if (audioBuffer && audioBuffer.length > 100) {
|
if (audioBuffer && audioBuffer.length > 100) {
|
||||||
const base64 = audioBuffer.toString("base64");
|
const base64 = audioBuffer.toString("base64");
|
||||||
log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`);
|
log(`TTS [${i + 1}/${sentences.length}]: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB) — "${sentence.slice(0, 40)}..."`);
|
||||||
|
|
||||||
sendToRVS({
|
sendToRVS({
|
||||||
type: "xtts_response",
|
type: "xtts_response",
|
||||||
payload: {
|
payload: {
|
||||||
requestId: requestId || "",
|
requestId: `${requestId || ""}_${i}`,
|
||||||
base64,
|
base64,
|
||||||
mimeType: "audio/wav",
|
mimeType: "audio/wav",
|
||||||
voice: voice || "default",
|
voice: voice || "default",
|
||||||
engine: "xtts",
|
engine: "xtts",
|
||||||
},
|
},
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
});
|
});
|
||||||
} else {
|
}
|
||||||
log("TTS: Leeres Audio erhalten");
|
} catch (sentenceErr) {
|
||||||
sendToRVS({
|
log(`TTS [${i + 1}/${sentences.length}] Fehler: ${sentenceErr.message} — ueberspringe`);
|
||||||
type: "xtts_response",
|
}
|
||||||
payload: { requestId, error: "Leeres Audio" },
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log(`TTS komplett: ${sentences.length} Saetze gerendert`);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log(`TTS Fehler: ${err.message}`);
|
log(`TTS Fehler: ${err.message}`);
|
||||||
sendToRVS({
|
sendToRVS({
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue