diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index a46146b..1d5655c 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -1100,25 +1100,12 @@ class ARIABridge: return elif msg_type == "audio_pcm": - # XTTS-PCM-Stream vom Gaming-PC empfangen → durchleiten zur App. - # Wenn in payload kein messageId (alte XTTS-Bridge), aus requestId auflösen. - error = payload.get("error", "") - if error: - logger.warning("[rvs] XTTS PCM-Fehler: %s", error) - return - linked_message_id = payload.get("messageId", "") - if not linked_message_id: - req_id_full = payload.get("requestId", "") - req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full - linked_message_id = self._xtts_request_to_message.get(req_id_base, "") - # Einfach 1:1 weiterleiten mit eingefuellter messageId - forwarded = dict(payload) - forwarded["messageId"] = linked_message_id - await self._send_to_rvs({ - "type": "audio_pcm", - "payload": forwarded, - "timestamp": int(asyncio.get_event_loop().time() * 1000), - }) + # Audio-PCM geht direkt von XTTS-Bridge an die App. + # Die aria-bridge darf es NICHT rebroadcasten — sonst bekommt die App + # jeden Chunk doppelt (einmal direkt von XTTS-Bridge via RVS-Broadcast, + # einmal indirekt via uns). + # Wir ignorieren diese Message hier einfach — messageId wird von + # XTTS-Bridge selbst im Payload mitgeliefert. return elif msg_type == "xtts_response": diff --git a/xtts/bridge.js b/xtts/bridge.js index b20cf82..f05a133 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -116,87 +116,58 @@ async function handleTTSRequest(payload) { .replace(/\(\)/g, "") .trim(); - // Satzweise Chunks (XTTS Modell laedt Context pro Call — Saetze gruppieren) - const sentences = cleanText.split(/(?<=[.!?])\s+/) - .map(s => s.trim()) - .filter(s => s.length > 0) - .map(s => s.replace(/[.]+$/, '')); - - const MAX_CHUNK_CHARS = 150; - const chunks = []; - let currentChunk = ''; - for (const sentence of sentences) { - if (currentChunk && (currentChunk.length + sentence.length + 2) > MAX_CHUNK_CHARS) { - chunks.push(currentChunk); - currentChunk = sentence; - } else { - currentChunk = currentChunk ? currentChunk + ', ' + sentence : sentence; - } - } - if (currentChunk) chunks.push(currentChunk); - if (chunks.length === 0) return; - - log(`TTS-Request (streaming): "${cleanText.slice(0, 60)}..." (${chunks.length} Chunks, voice: ${voice || "default"})`); + log(`TTS-Request (streaming): "${cleanText.slice(0, 80)}..." (${cleanText.length} chars, voice: ${voice || "default"})`); try { const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null; const hasCustomVoice = voiceSample && fs.existsSync(voiceSample); let chunkIndex = 0; - // Audio-Format (aus WAV-Header extrahiert, einmal pro Request) let pcmMeta = null; - for (let i = 0; i < chunks.length; i++) { - const chunk = chunks[i]; - const isLastChunk = i === chunks.length - 1; - try { - // Streaming: PCM-Frames werden nacheinander an RVS gepusht, - // sobald sie vom XTTS-Server reinkommen - await streamXTTSAsPCM( - chunk, - language || "de", - hasCustomVoice ? voiceSample : null, - (pcmBase64, meta) => { - if (!pcmMeta) pcmMeta = meta; - sendToRVS({ - type: "audio_pcm", - payload: { - requestId: requestId || "", - messageId: messageId || "", - base64: pcmBase64, - format: "pcm_s16le", - sampleRate: meta.sampleRate, - channels: meta.channels, - voice: voice || "default", - chunk: chunkIndex++, - final: false, - }, - timestamp: Date.now(), - }); + // EIN Request fuer den GANZEN Text — kein Gap zwischen Saetzen. + // XTTS rendert und wir streamen PCM sobald es reinkommt. + await streamXTTSAsPCM( + cleanText, + language || "de", + hasCustomVoice ? voiceSample : null, + (pcmBase64, meta) => { + if (!pcmMeta) pcmMeta = meta; + sendToRVS({ + type: "audio_pcm", + payload: { + requestId: requestId || "", + messageId: messageId || "", + base64: pcmBase64, + format: "pcm_s16le", + sampleRate: meta.sampleRate, + channels: meta.channels, + voice: voice || "default", + chunk: chunkIndex++, + final: false, }, - ); + timestamp: Date.now(), + }); + }, + ); - // Nach letztem Text-Chunk: final-Flag senden damit App weiss "fertig" - if (isLastChunk && pcmMeta) { - sendToRVS({ - type: "audio_pcm", - payload: { - requestId: requestId || "", - messageId: messageId || "", - base64: "", - format: "pcm_s16le", - sampleRate: pcmMeta.sampleRate, - channels: pcmMeta.channels, - voice: voice || "default", - chunk: chunkIndex++, - final: true, - }, - timestamp: Date.now(), - }); - } - } catch (chunkErr) { - log(`TTS [${i + 1}/${chunks.length}] Fehler: ${chunkErr.message} — ueberspringe`); - } + // Am Ende: final-Flag damit App weiss "fertig" und Cache geschrieben werden kann + if (pcmMeta) { + sendToRVS({ + type: "audio_pcm", + payload: { + requestId: requestId || "", + messageId: messageId || "", + base64: "", + format: "pcm_s16le", + sampleRate: pcmMeta.sampleRate, + channels: pcmMeta.channels, + voice: voice || "default", + chunk: chunkIndex++, + final: true, + }, + timestamp: Date.now(), + }); } log(`TTS komplett: ${chunkIndex} PCM-Frames gestreamt (${cleanText.length} chars)`);