fix: XTTS splits concatenated audio into ~8s parts (seamless with preload)
- All chunks rendered and PCM concatenated (consistent voice) - Split into ~8 second WAV parts (not per-sentence) - 8s is long enough for preload overlap, small enough for WebSocket - Parts include part/totalParts metadata Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
2c785cb37a
commit
81f7c38383
|
|
@ -187,45 +187,47 @@ async function handleTTSRequest(payload) {
|
||||||
const completeWav = Buffer.concat([wavHeader, allPcm]);
|
const completeWav = Buffer.concat([wavHeader, allPcm]);
|
||||||
const base64 = completeWav.toString("base64");
|
const base64 = completeWav.toString("base64");
|
||||||
|
|
||||||
// Wenn zu gross (>800KB PCM) → in Teile splitten, sonst als Ganzes senden
|
// In ~8 Sekunden Teile splitten (nahtlos genug fuer Queue, klein genug fuer WebSocket)
|
||||||
const MAX_PCM_SIZE = 800 * 1024; // ~800KB PCM pro Nachricht
|
const samplesPerSec = sampleRate * channels * (bitsPerSample / 8);
|
||||||
|
const TARGET_SECS = 8; // ~8 Sekunden pro Teil
|
||||||
|
const targetBytes = samplesPerSec * TARGET_SECS;
|
||||||
|
|
||||||
const pcmParts = [];
|
const pcmParts = [];
|
||||||
if (allPcm.length > MAX_PCM_SIZE) {
|
for (let offset = 0; offset < allPcm.length; offset += targetBytes) {
|
||||||
for (let offset = 0; offset < allPcm.length; offset += MAX_PCM_SIZE) {
|
pcmParts.push(allPcm.slice(offset, Math.min(offset + targetBytes, allPcm.length)));
|
||||||
pcmParts.push(allPcm.slice(offset, Math.min(offset + MAX_PCM_SIZE, allPcm.length)));
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
pcmParts.push(allPcm);
|
function buildWav(pcmData) {
|
||||||
|
const header = Buffer.alloc(44);
|
||||||
|
header.write("RIFF", 0);
|
||||||
|
header.writeUInt32LE(36 + pcmData.length, 4);
|
||||||
|
header.write("WAVE", 8);
|
||||||
|
header.write("fmt ", 12);
|
||||||
|
header.writeUInt32LE(16, 16);
|
||||||
|
header.writeUInt16LE(1, 20);
|
||||||
|
header.writeUInt16LE(channels, 22);
|
||||||
|
header.writeUInt32LE(sampleRate, 24);
|
||||||
|
header.writeUInt32LE(byteRate, 28);
|
||||||
|
header.writeUInt16LE(blockAlign, 32);
|
||||||
|
header.writeUInt16LE(bitsPerSample, 34);
|
||||||
|
header.write("data", 36);
|
||||||
|
header.writeUInt32LE(pcmData.length, 40);
|
||||||
|
return Buffer.concat([header, pcmData]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let p = 0; p < pcmParts.length; p++) {
|
for (let p = 0; p < pcmParts.length; p++) {
|
||||||
const partPcm = pcmParts[p];
|
const partWav = buildWav(pcmParts[p]);
|
||||||
const partHeader = Buffer.alloc(44);
|
|
||||||
partHeader.write("RIFF", 0);
|
|
||||||
partHeader.writeUInt32LE(36 + partPcm.length, 4);
|
|
||||||
partHeader.write("WAVE", 8);
|
|
||||||
partHeader.write("fmt ", 12);
|
|
||||||
partHeader.writeUInt32LE(16, 16);
|
|
||||||
partHeader.writeUInt16LE(1, 20);
|
|
||||||
partHeader.writeUInt16LE(channels, 22);
|
|
||||||
partHeader.writeUInt32LE(sampleRate, 24);
|
|
||||||
partHeader.writeUInt32LE(byteRate, 28);
|
|
||||||
partHeader.writeUInt16LE(blockAlign, 32);
|
|
||||||
partHeader.writeUInt16LE(bitsPerSample, 34);
|
|
||||||
partHeader.write("data", 36);
|
|
||||||
partHeader.writeUInt32LE(partPcm.length, 40);
|
|
||||||
|
|
||||||
const partWav = Buffer.concat([partHeader, partPcm]);
|
|
||||||
const partBase64 = partWav.toString("base64");
|
|
||||||
|
|
||||||
sendToRVS({
|
sendToRVS({
|
||||||
type: "xtts_response",
|
type: "xtts_response",
|
||||||
payload: {
|
payload: {
|
||||||
requestId: `${requestId || ""}${pcmParts.length > 1 ? '_' + p : ''}`,
|
requestId: `${requestId || ""}${pcmParts.length > 1 ? '_' + p : ''}`,
|
||||||
base64: partBase64,
|
base64: partWav.toString("base64"),
|
||||||
mimeType: "audio/wav",
|
mimeType: "audio/wav",
|
||||||
voice: voice || "default",
|
voice: voice || "default",
|
||||||
engine: "xtts",
|
engine: "xtts",
|
||||||
|
part: p + 1,
|
||||||
|
totalParts: pcmParts.length,
|
||||||
},
|
},
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue