diff --git a/xtts/bridge.js b/xtts/bridge.js index 1a07e3f..5b7d673 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -187,45 +187,47 @@ async function handleTTSRequest(payload) { const completeWav = Buffer.concat([wavHeader, allPcm]); const base64 = completeWav.toString("base64"); - // Wenn zu gross (>800KB PCM) → in Teile splitten, sonst als Ganzes senden - const MAX_PCM_SIZE = 800 * 1024; // ~800KB PCM pro Nachricht + // In ~8 Sekunden Teile splitten (nahtlos genug fuer Queue, klein genug fuer WebSocket) + const samplesPerSec = sampleRate * channels * (bitsPerSample / 8); + const TARGET_SECS = 8; // ~8 Sekunden pro Teil + const targetBytes = samplesPerSec * TARGET_SECS; + const pcmParts = []; - if (allPcm.length > MAX_PCM_SIZE) { - for (let offset = 0; offset < allPcm.length; offset += MAX_PCM_SIZE) { - pcmParts.push(allPcm.slice(offset, Math.min(offset + MAX_PCM_SIZE, allPcm.length))); - } - } else { - pcmParts.push(allPcm); + for (let offset = 0; offset < allPcm.length; offset += targetBytes) { + pcmParts.push(allPcm.slice(offset, Math.min(offset + targetBytes, allPcm.length))); + } + + function buildWav(pcmData) { + const header = Buffer.alloc(44); + header.write("RIFF", 0); + header.writeUInt32LE(36 + pcmData.length, 4); + header.write("WAVE", 8); + header.write("fmt ", 12); + header.writeUInt32LE(16, 16); + header.writeUInt16LE(1, 20); + header.writeUInt16LE(channels, 22); + header.writeUInt32LE(sampleRate, 24); + header.writeUInt32LE(byteRate, 28); + header.writeUInt16LE(blockAlign, 32); + header.writeUInt16LE(bitsPerSample, 34); + header.write("data", 36); + header.writeUInt32LE(pcmData.length, 40); + return Buffer.concat([header, pcmData]); } for (let p = 0; p < pcmParts.length; p++) { - const partPcm = pcmParts[p]; - const partHeader = Buffer.alloc(44); - partHeader.write("RIFF", 0); - partHeader.writeUInt32LE(36 + partPcm.length, 4); - partHeader.write("WAVE", 8); - partHeader.write("fmt ", 12); - partHeader.writeUInt32LE(16, 16); - partHeader.writeUInt16LE(1, 20); - partHeader.writeUInt16LE(channels, 22); - partHeader.writeUInt32LE(sampleRate, 24); - partHeader.writeUInt32LE(byteRate, 28); - partHeader.writeUInt16LE(blockAlign, 32); - partHeader.writeUInt16LE(bitsPerSample, 34); - partHeader.write("data", 36); - partHeader.writeUInt32LE(partPcm.length, 40); - - const partWav = Buffer.concat([partHeader, partPcm]); - const partBase64 = partWav.toString("base64"); + const partWav = buildWav(pcmParts[p]); sendToRVS({ type: "xtts_response", payload: { requestId: `${requestId || ""}${pcmParts.length > 1 ? '_' + p : ''}`, - base64: partBase64, + base64: partWav.toString("base64"), mimeType: "audio/wav", voice: voice || "default", engine: "xtts", + part: p + 1, + totalParts: pcmParts.length, }, timestamp: Date.now(), });