|
|
|
@@ -138,31 +138,43 @@ async function _runTTSRequest(payload) {
|
|
|
|
|
let chunkIndex = 0;
|
|
|
|
|
let pcmMeta = null;
|
|
|
|
|
|
|
|
|
|
// EIN Request fuer den GANZEN Text — kein Gap zwischen Saetzen.
|
|
|
|
|
// XTTS rendert und wir streamen PCM sobald es reinkommt.
|
|
|
|
|
await streamXTTSAsPCM(
|
|
|
|
|
cleanText,
|
|
|
|
|
language || "de",
|
|
|
|
|
hasCustomVoice ? voiceSample : null,
|
|
|
|
|
(pcmBase64, meta) => {
|
|
|
|
|
if (!pcmMeta) pcmMeta = meta;
|
|
|
|
|
sendToRVS({
|
|
|
|
|
type: "audio_pcm",
|
|
|
|
|
payload: {
|
|
|
|
|
requestId: requestId || "",
|
|
|
|
|
messageId: messageId || "",
|
|
|
|
|
base64: pcmBase64,
|
|
|
|
|
format: "pcm_s16le",
|
|
|
|
|
sampleRate: meta.sampleRate,
|
|
|
|
|
channels: meta.channels,
|
|
|
|
|
voice: voice || "default",
|
|
|
|
|
chunk: chunkIndex++,
|
|
|
|
|
final: false,
|
|
|
|
|
},
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
});
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
const onChunk = (pcmBase64, meta) => {
|
|
|
|
|
if (!pcmMeta) pcmMeta = meta;
|
|
|
|
|
sendToRVS({
|
|
|
|
|
type: "audio_pcm",
|
|
|
|
|
payload: {
|
|
|
|
|
requestId: requestId || "",
|
|
|
|
|
messageId: messageId || "",
|
|
|
|
|
base64: pcmBase64,
|
|
|
|
|
format: "pcm_s16le",
|
|
|
|
|
sampleRate: meta.sampleRate,
|
|
|
|
|
channels: meta.channels,
|
|
|
|
|
voice: voice || "default",
|
|
|
|
|
chunk: chunkIndex++,
|
|
|
|
|
final: false,
|
|
|
|
|
},
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
});
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// /tts_stream fuer echtes Streaming (funktioniert im XTTS local-Mode).
|
|
|
|
|
// Wenn Server im apiManual/api-Mode laeuft: 400 → Fallback auf /tts_to_audio/.
|
|
|
|
|
try {
|
|
|
|
|
await streamXTTSAsPCM(
|
|
|
|
|
cleanText,
|
|
|
|
|
language || "de",
|
|
|
|
|
hasCustomVoice ? voiceSample : null,
|
|
|
|
|
onChunk,
|
|
|
|
|
);
|
|
|
|
|
} catch (streamErr) {
|
|
|
|
|
log(`/tts_stream fehlgeschlagen (${streamErr.message.slice(0, 100)}) — Fallback /tts_to_audio/`);
|
|
|
|
|
await streamXTTSBatch(
|
|
|
|
|
cleanText,
|
|
|
|
|
language || "de",
|
|
|
|
|
hasCustomVoice ? voiceSample : null,
|
|
|
|
|
onChunk,
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Am Ende: final-Flag damit App weiss "fertig" und Cache geschrieben werden kann
|
|
|
|
|
if (pcmMeta) {
|
|
|
|
@@ -195,45 +207,44 @@ async function _runTTSRequest(payload) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Ruft /tts_to_audio/ auf und streamt das resultierende WAV bereits waehrend
|
|
|
|
|
* des Empfangs in PCM-Frames an den Callback. Der WAV-Header wird einmal
|
|
|
|
|
* geparst, danach werden nur noch raw PCM-Samples weitergeleitet.
|
|
|
|
|
*
|
|
|
|
|
* Warum nicht echtes /tts_stream/? daswer123 hat den Endpoint, aber die
|
|
|
|
|
* Audio-Quality ist dort niedriger und er produziert beim ersten Chunk
|
|
|
|
|
* oft Artefakte. Pragmatischer Weg: /tts_to_audio/ + Response-Stream
|
|
|
|
|
* chunkweise auslesen. Das ist zwar kein echtes Server-Streaming, aber
|
|
|
|
|
* gibt uns deutlich kleinere Netzwerk-Haeppchen und die App kann via
|
|
|
|
|
* AudioTrack MODE_STREAM sofort nahtlos abspielen.
|
|
|
|
|
* Ruft /tts_stream auf — echter Streaming-Endpoint bei daswer123.
|
|
|
|
|
* Schickt was der Server verlangt (allow: GET), aber mit JSON-Body
|
|
|
|
|
* als POST scheitert mit 405. Manche Versionen wollen GET + Query,
|
|
|
|
|
* andere POST + JSON. Testen was funktioniert.
|
|
|
|
|
*/
|
|
|
|
|
function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const body = JSON.stringify({
|
|
|
|
|
text,
|
|
|
|
|
language,
|
|
|
|
|
speaker_wav: speakerWav || "",
|
|
|
|
|
});
|
|
|
|
|
// Wichtig: speaker_wav MUSS als Query-Key dabei sein (Pydantic required) —
|
|
|
|
|
// auch bei default-voice mit leerem Wert. Sonst gibt's HTTP 422.
|
|
|
|
|
const qs = new URLSearchParams();
|
|
|
|
|
qs.set("text", text);
|
|
|
|
|
qs.set("language", language || "de");
|
|
|
|
|
qs.set("speaker_wav", speakerWav || "");
|
|
|
|
|
qs.set("stream_chunk_size", "40");
|
|
|
|
|
|
|
|
|
|
const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
|
|
|
|
|
const url = new URL(XTTS_API_URL);
|
|
|
|
|
const fullPath = `/tts_stream?${qs.toString()}`;
|
|
|
|
|
const options = {
|
|
|
|
|
hostname: url.hostname,
|
|
|
|
|
port: url.port,
|
|
|
|
|
path: url.pathname,
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
"Content-Length": Buffer.byteLength(body),
|
|
|
|
|
},
|
|
|
|
|
port: url.port || 80,
|
|
|
|
|
path: fullPath,
|
|
|
|
|
method: "GET",
|
|
|
|
|
timeout: 60000,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
log(`TTS GET /tts_stream?text=${text.slice(0, 30)}... (voice=${speakerWav ? "custom" : "default"})`);
|
|
|
|
|
|
|
|
|
|
const req = http.request(options, (res) => {
|
|
|
|
|
if (res.statusCode !== 200) {
|
|
|
|
|
let body = "";
|
|
|
|
|
res.on("data", (d) => { body += d.toString(); });
|
|
|
|
|
res.on("end", () => reject(new Error(`XTTS HTTP ${res.statusCode}: ${body.slice(0, 200)}`)));
|
|
|
|
|
res.on("end", () => {
|
|
|
|
|
log(`XTTS /tts_stream ${res.statusCode}: ${body.slice(0, 300)}`);
|
|
|
|
|
reject(new Error(`XTTS HTTP ${res.statusCode}: ${body.slice(0, 200)}`));
|
|
|
|
|
});
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
log(`TTS stream verbunden, empfange PCM...`);
|
|
|
|
|
|
|
|
|
|
let headerParsed = false;
|
|
|
|
|
let sampleRate = 24000;
|
|
|
|
@@ -285,6 +296,76 @@ function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
|
|
|
|
|
|
|
|
|
req.on("error", reject);
|
|
|
|
|
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
|
|
|
|
|
req.end();
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Fallback: /tts_to_audio/ (POST JSON) — rendert komplett, dann response.
|
|
|
|
|
* Kein echtes Streaming, aber stabil als Backup wenn /tts_stream nicht geht.
|
|
|
|
|
* Shared chunking-Logik mit streamXTTSAsPCM — parst WAV-Header, stueckelt PCM.
|
|
|
|
|
*/
|
|
|
|
|
function streamXTTSBatch(text, language, speakerWav, onPcmChunk) {
|
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
|
const body = JSON.stringify({
|
|
|
|
|
text,
|
|
|
|
|
language: language || "de",
|
|
|
|
|
speaker_wav: speakerWav || "",
|
|
|
|
|
});
|
|
|
|
|
const url = new URL(XTTS_API_URL);
|
|
|
|
|
const options = {
|
|
|
|
|
hostname: url.hostname,
|
|
|
|
|
port: url.port || 80,
|
|
|
|
|
path: "/tts_to_audio/",
|
|
|
|
|
method: "POST",
|
|
|
|
|
headers: {
|
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
|
"Content-Length": Buffer.byteLength(body),
|
|
|
|
|
},
|
|
|
|
|
timeout: 60000,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const req = http.request(options, (res) => {
|
|
|
|
|
if (res.statusCode !== 200) {
|
|
|
|
|
let rb = "";
|
|
|
|
|
res.on("data", (d) => { rb += d.toString(); });
|
|
|
|
|
res.on("end", () => reject(new Error(`XTTS Batch HTTP ${res.statusCode}: ${rb.slice(0, 200)}`)));
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
let headerParsed = false;
|
|
|
|
|
let sampleRate = 24000;
|
|
|
|
|
let channels = 1;
|
|
|
|
|
let leftover = Buffer.alloc(0);
|
|
|
|
|
let headerBuf = Buffer.alloc(0);
|
|
|
|
|
const HEADER_BYTES = 44;
|
|
|
|
|
const PCM_CHUNK_BYTES = 8192;
|
|
|
|
|
|
|
|
|
|
res.on("data", (chunk) => {
|
|
|
|
|
let data = chunk;
|
|
|
|
|
if (!headerParsed) {
|
|
|
|
|
headerBuf = Buffer.concat([headerBuf, data]);
|
|
|
|
|
if (headerBuf.length < HEADER_BYTES) return;
|
|
|
|
|
const header = headerBuf.slice(0, HEADER_BYTES);
|
|
|
|
|
try { channels = header.readUInt16LE(22); sampleRate = header.readUInt32LE(24); } catch (_) {}
|
|
|
|
|
headerParsed = true;
|
|
|
|
|
data = headerBuf.slice(HEADER_BYTES);
|
|
|
|
|
}
|
|
|
|
|
let combined = Buffer.concat([leftover, data]);
|
|
|
|
|
while (combined.length >= PCM_CHUNK_BYTES) {
|
|
|
|
|
const slice = combined.slice(0, PCM_CHUNK_BYTES);
|
|
|
|
|
combined = combined.slice(PCM_CHUNK_BYTES);
|
|
|
|
|
onPcmChunk(slice.toString("base64"), { sampleRate, channels });
|
|
|
|
|
}
|
|
|
|
|
leftover = combined;
|
|
|
|
|
});
|
|
|
|
|
res.on("end", () => {
|
|
|
|
|
if (leftover.length > 0) onPcmChunk(leftover.toString("base64"), { sampleRate, channels });
|
|
|
|
|
resolve();
|
|
|
|
|
});
|
|
|
|
|
res.on("error", reject);
|
|
|
|
|
});
|
|
|
|
|
req.on("error", reject);
|
|
|
|
|
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS Batch Timeout (60s)")); });
|
|
|
|
|
req.write(body);
|
|
|
|
|
req.end();
|
|
|
|
|
});
|
|
|
|
|