fix: Fade-In auf ersten PCM-Chunk — maskiert XTTS-Warmup-Glitches
XTTS daswer123 hat am Anfang jedes Renders Warmup-Artefakte — die ersten autoregressiv generierten Tokens haben wenig Kontext und klingen verzerrt. Ein 120ms Linear-Fade-In auf den ersten ausgehenden PCM-Chunk blendet das sanft auf und versteckt die Glitches, ohne dass das echte Audio danach leiser klingt. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
32ddac002f
commit
6c8ba5fe2d
|
|
@ -95,6 +95,25 @@ function connectRVS(forcePlain) {
|
||||||
|
|
||||||
// ── TTS Request Handler ─────────────────────────────
|
// ── TTS Request Handler ─────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Linearer Fade-In auf einen base64-PCM-Chunk (s16le).
|
||||||
|
* Mascht XTTS-Warmup-Glitches am Anfang eines Renders.
|
||||||
|
*/
|
||||||
|
function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) {
|
||||||
|
const buf = Buffer.from(base64Pcm, "base64");
|
||||||
|
const totalSamples = buf.length / 2; // s16le
|
||||||
|
const fadeSamples = Math.min(
|
||||||
|
Math.floor((fadeMs / 1000) * sampleRate) * channels,
|
||||||
|
totalSamples
|
||||||
|
);
|
||||||
|
for (let i = 0; i < fadeSamples; i++) {
|
||||||
|
const sample = buf.readInt16LE(i * 2);
|
||||||
|
const gain = i / fadeSamples;
|
||||||
|
buf.writeInt16LE(Math.round(sample * gain), i * 2);
|
||||||
|
}
|
||||||
|
return buf.toString("base64");
|
||||||
|
}
|
||||||
|
|
||||||
// ── TTS-Queue ──────────────────────────────────────
|
// ── TTS-Queue ──────────────────────────────────────
|
||||||
// XTTS verarbeitet Requests sequenziell, damit Streams sich nicht ueberlappen.
|
// XTTS verarbeitet Requests sequenziell, damit Streams sich nicht ueberlappen.
|
||||||
// Ohne Queue wuerden parallele Requests parallel streamen → App bekommt
|
// Ohne Queue wuerden parallele Requests parallel streamen → App bekommt
|
||||||
|
|
@ -137,15 +156,23 @@ async function _runTTSRequest(payload) {
|
||||||
|
|
||||||
let chunkIndex = 0;
|
let chunkIndex = 0;
|
||||||
let pcmMeta = null;
|
let pcmMeta = null;
|
||||||
|
let firstChunkSeen = false;
|
||||||
|
|
||||||
const onChunk = (pcmBase64, meta) => {
|
const onChunk = (pcmBase64, meta) => {
|
||||||
if (!pcmMeta) pcmMeta = meta;
|
if (!pcmMeta) pcmMeta = meta;
|
||||||
|
let outBase64 = pcmBase64;
|
||||||
|
// Fade-In auf den ersten Chunk — maskiert XTTS-Warmup-Glitches
|
||||||
|
// (autoregressiver Generator hat am Anfang wenig Kontext → Artefakte).
|
||||||
|
if (!firstChunkSeen && pcmBase64) {
|
||||||
|
firstChunkSeen = true;
|
||||||
|
outBase64 = applyFadeIn(pcmBase64, meta.sampleRate, meta.channels, 120);
|
||||||
|
}
|
||||||
sendToRVS({
|
sendToRVS({
|
||||||
type: "audio_pcm",
|
type: "audio_pcm",
|
||||||
payload: {
|
payload: {
|
||||||
requestId: requestId || "",
|
requestId: requestId || "",
|
||||||
messageId: messageId || "",
|
messageId: messageId || "",
|
||||||
base64: pcmBase64,
|
base64: outBase64,
|
||||||
format: "pcm_s16le",
|
format: "pcm_s16le",
|
||||||
sampleRate: meta.sampleRate,
|
sampleRate: meta.sampleRate,
|
||||||
channels: meta.channels,
|
channels: meta.channels,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue