diff --git a/xtts/bridge.js b/xtts/bridge.js index 5b9fc11..0a6da4e 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -195,32 +195,27 @@ async function _runTTSRequest(payload) { } /** - * Ruft /tts_to_audio/ auf und streamt das Response-Body chunkweise an - * den Callback. Kein echtes Server-Streaming (XTTS rendert komplett - * bevor es antwortet), aber stabil und mit der Queue + grosszuegigem - * AudioTrack-Buffer klingt's akzeptabel. + * Ruft /tts_stream (GET) auf — echter Streaming-Endpoint bei daswer123. + * Samples fliessen waehrend XTTS rendert (chunked transfer). + * Time-to-first-audio ~300-500ms statt 2-4s beim batch-Endpoint. * - * /tts_stream ist elegant, funktioniert aber nicht in allen Versionen - * von daswer123/xtts-api-server. + * Parameter werden als Query-String uebergeben (GET-API). */ function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) { return new Promise((resolve, reject) => { - const body = JSON.stringify({ + const qs = new URLSearchParams({ text, - language, - speaker_wav: speakerWav || "", + language: language || "de", + speaker_wav: speakerWav ? speakerWav : "", + stream_chunk_size: "40", }); - const url = new URL(`${XTTS_API_URL}/tts_to_audio/`); + const url = new URL(`${XTTS_API_URL}/tts_stream?${qs.toString()}`); const options = { hostname: url.hostname, port: url.port, - path: url.pathname, - method: "POST", - headers: { - "Content-Type": "application/json", - "Content-Length": Buffer.byteLength(body), - }, + path: `${url.pathname}?${url.searchParams.toString()}`, + method: "GET", timeout: 60000, }; @@ -282,7 +277,6 @@ function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) { req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); }); - req.write(body); req.end(); }); }