269 lines
7.9 KiB
JavaScript
269 lines
7.9 KiB
JavaScript
/**
|
|
* ARIA XTTS Bridge — Verbindet XTTS v2 Server mit dem RVS
|
|
*
|
|
* Empfaengt tts_request ueber RVS → rendert Audio via XTTS API → sendet zurueck
|
|
* Empfaengt voice_upload → speichert Voice-Sample fuer Cloning
|
|
* Empfaengt xtts_list_voices → listet verfuegbare Stimmen
|
|
*/
|
|
|
|
const WebSocket = require("ws");
|
|
const http = require("http");
|
|
const https = require("https");
|
|
const fs = require("fs");
|
|
const path = require("path");
|
|
|
|
const XTTS_API_URL = process.env.XTTS_API_URL || "http://xtts:8000";
|
|
const RVS_HOST = process.env.RVS_HOST || "";
|
|
const RVS_PORT = process.env.RVS_PORT || "443";
|
|
const RVS_TLS = process.env.RVS_TLS || "true";
|
|
const RVS_TLS_FALLBACK = process.env.RVS_TLS_FALLBACK || "true";
|
|
const RVS_TOKEN = process.env.RVS_TOKEN || "";
|
|
const VOICES_DIR = "/voices";
|
|
|
|
function log(msg) {
|
|
console.log(`[${new Date().toISOString()}] ${msg}`);
|
|
}
|
|
|
|
// ── RVS Verbindung ──────────────────────────────────
|
|
|
|
let rvsWs = null;
|
|
let retryDelay = 2;
|
|
|
|
function connectRVS(forcePlain) {
|
|
if (!RVS_HOST || !RVS_TOKEN) {
|
|
log("RVS nicht konfiguriert — beende");
|
|
process.exit(1);
|
|
}
|
|
|
|
const useTls = RVS_TLS === "true" && !forcePlain;
|
|
const proto = useTls ? "wss" : "ws";
|
|
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
|
|
|
|
log(`Verbinde zu RVS: ${proto}://${RVS_HOST}:${RVS_PORT}`);
|
|
|
|
const ws = new WebSocket(url);
|
|
|
|
ws.on("open", () => {
|
|
log("RVS verbunden — warte auf TTS-Requests");
|
|
rvsWs = ws;
|
|
retryDelay = 2;
|
|
|
|
// Keepalive
|
|
setInterval(() => {
|
|
if (ws.readyState === WebSocket.OPEN) {
|
|
ws.ping();
|
|
ws.send(JSON.stringify({ type: "heartbeat", timestamp: Date.now() }));
|
|
}
|
|
}, 25000);
|
|
});
|
|
|
|
ws.on("message", async (raw) => {
|
|
try {
|
|
const msg = JSON.parse(raw.toString());
|
|
|
|
if (msg.type === "xtts_request") {
|
|
await handleTTSRequest(msg.payload);
|
|
} else if (msg.type === "voice_upload") {
|
|
await handleVoiceUpload(msg.payload);
|
|
} else if (msg.type === "xtts_list_voices") {
|
|
await handleListVoices();
|
|
}
|
|
} catch (err) {
|
|
log(`Fehler: ${err.message}`);
|
|
}
|
|
});
|
|
|
|
ws.on("close", () => {
|
|
log("RVS Verbindung geschlossen");
|
|
rvsWs = null;
|
|
setTimeout(() => connectRVS(), Math.min(retryDelay * 1000, 30000));
|
|
retryDelay = Math.min(retryDelay * 2, 30);
|
|
});
|
|
|
|
ws.on("error", (err) => {
|
|
log(`RVS Fehler: ${err.message}`);
|
|
if (useTls && RVS_TLS_FALLBACK === "true") {
|
|
log("TLS fehlgeschlagen — Fallback auf ws://");
|
|
ws.removeAllListeners();
|
|
try { ws.close(); } catch (_) {}
|
|
connectRVS(true);
|
|
}
|
|
});
|
|
}
|
|
|
|
// ── TTS Request Handler ─────────────────────────────
|
|
|
|
async function handleTTSRequest(payload) {
|
|
const { text, voice, requestId, language } = payload;
|
|
if (!text) return;
|
|
|
|
log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`);
|
|
|
|
try {
|
|
// Voice-Sample Pfad bestimmen
|
|
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
|
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
|
|
|
// XTTS API aufrufen
|
|
const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null);
|
|
|
|
if (audioBuffer && audioBuffer.length > 100) {
|
|
const base64 = audioBuffer.toString("base64");
|
|
log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`);
|
|
|
|
sendToRVS({
|
|
type: "xtts_response",
|
|
payload: {
|
|
requestId: requestId || "",
|
|
base64,
|
|
mimeType: "audio/wav",
|
|
voice: voice || "default",
|
|
engine: "xtts",
|
|
},
|
|
timestamp: Date.now(),
|
|
});
|
|
} else {
|
|
log("TTS: Leeres Audio erhalten");
|
|
sendToRVS({
|
|
type: "xtts_response",
|
|
payload: { requestId, error: "Leeres Audio" },
|
|
timestamp: Date.now(),
|
|
});
|
|
}
|
|
} catch (err) {
|
|
log(`TTS Fehler: ${err.message}`);
|
|
sendToRVS({
|
|
type: "xtts_response",
|
|
payload: { requestId, error: err.message },
|
|
timestamp: Date.now(),
|
|
});
|
|
}
|
|
}
|
|
|
|
function callXTTSAPI(text, language, speakerWav) {
|
|
return new Promise((resolve, reject) => {
|
|
const body = JSON.stringify({
|
|
text,
|
|
language,
|
|
speaker_wav: speakerWav || "",
|
|
});
|
|
|
|
const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
|
|
const options = {
|
|
hostname: url.hostname,
|
|
port: url.port,
|
|
path: url.pathname,
|
|
method: "POST",
|
|
headers: {
|
|
"Content-Type": "application/json",
|
|
"Content-Length": Buffer.byteLength(body),
|
|
},
|
|
timeout: 60000,
|
|
};
|
|
|
|
const req = http.request(options, (res) => {
|
|
const chunks = [];
|
|
res.on("data", (chunk) => chunks.push(chunk));
|
|
res.on("end", () => {
|
|
if (res.statusCode === 200) {
|
|
resolve(Buffer.concat(chunks));
|
|
} else {
|
|
reject(new Error(`XTTS API HTTP ${res.statusCode}: ${Buffer.concat(chunks).toString().slice(0, 200)}`));
|
|
}
|
|
});
|
|
});
|
|
|
|
req.on("error", reject);
|
|
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
|
|
req.write(body);
|
|
req.end();
|
|
});
|
|
}
|
|
|
|
// ── Voice Upload Handler ────────────────────────────
|
|
|
|
async function handleVoiceUpload(payload) {
|
|
const { name, samples } = payload;
|
|
if (!name || !samples || !Array.isArray(samples) || samples.length === 0) {
|
|
log("Voice Upload: Ungueltige Daten");
|
|
return;
|
|
}
|
|
|
|
log(`Voice Upload: "${name}" (${samples.length} Samples)`);
|
|
|
|
try {
|
|
// Alle Samples zusammenfuegen
|
|
const buffers = samples.map(s => Buffer.from(s.base64, "base64"));
|
|
const combined = Buffer.concat(buffers);
|
|
|
|
// Als WAV speichern
|
|
fs.mkdirSync(VOICES_DIR, { recursive: true });
|
|
const filePath = path.join(VOICES_DIR, `${name.replace(/[^a-zA-Z0-9_-]/g, "_")}.wav`);
|
|
fs.writeFileSync(filePath, combined);
|
|
|
|
log(`Voice gespeichert: ${filePath} (${(combined.length / 1024).toFixed(0)}KB)`);
|
|
|
|
sendToRVS({
|
|
type: "xtts_voice_saved",
|
|
payload: { name, size: combined.length, path: filePath },
|
|
timestamp: Date.now(),
|
|
});
|
|
} catch (err) {
|
|
log(`Voice Upload Fehler: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
// ── Voice List Handler ──────────────────────────────
|
|
|
|
async function handleListVoices() {
|
|
try {
|
|
const files = fs.existsSync(VOICES_DIR)
|
|
? fs.readdirSync(VOICES_DIR).filter(f => f.endsWith(".wav"))
|
|
: [];
|
|
|
|
const voices = files.map(f => ({
|
|
name: path.basename(f, ".wav"),
|
|
file: f,
|
|
size: fs.statSync(path.join(VOICES_DIR, f)).size,
|
|
}));
|
|
|
|
log(`Stimmen: ${voices.length} verfuegbar`);
|
|
|
|
sendToRVS({
|
|
type: "xtts_voices_list",
|
|
payload: { voices },
|
|
timestamp: Date.now(),
|
|
});
|
|
} catch (err) {
|
|
log(`Stimmen-Liste Fehler: ${err.message}`);
|
|
}
|
|
}
|
|
|
|
// ── RVS senden ──────────────────────────────────────
|
|
|
|
function sendToRVS(msg) {
|
|
if (rvsWs && rvsWs.readyState === WebSocket.OPEN) {
|
|
rvsWs.send(JSON.stringify(msg));
|
|
}
|
|
}
|
|
|
|
// ── Start ───────────────────────────────────────────
|
|
|
|
log("ARIA XTTS Bridge startet...");
|
|
log(`XTTS API: ${XTTS_API_URL}`);
|
|
log(`RVS: ${RVS_HOST}:${RVS_PORT}`);
|
|
|
|
// Warten bis XTTS API erreichbar ist
|
|
function waitForXTTS(callback, attempts) {
|
|
if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; }
|
|
http.get(`${XTTS_API_URL}/docs`, (res) => {
|
|
log("XTTS API erreichbar");
|
|
callback();
|
|
}).on("error", () => {
|
|
log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`);
|
|
setTimeout(() => waitForXTTS(callback, attempts - 1), 5000);
|
|
});
|
|
}
|
|
|
|
waitForXTTS(() => connectRVS(), 24); // Max 2min warten
|