/** * ARIA XTTS Bridge — Verbindet XTTS v2 Server mit dem RVS * * Empfaengt tts_request ueber RVS → rendert Audio via XTTS API → sendet zurueck * Empfaengt voice_upload → speichert Voice-Sample fuer Cloning * Empfaengt xtts_list_voices → listet verfuegbare Stimmen */ const WebSocket = require("ws"); const http = require("http"); const https = require("https"); const fs = require("fs"); const path = require("path"); const XTTS_API_URL = process.env.XTTS_API_URL || "http://xtts:8000"; const RVS_HOST = process.env.RVS_HOST || ""; const RVS_PORT = process.env.RVS_PORT || "443"; const RVS_TLS = process.env.RVS_TLS || "true"; const RVS_TLS_FALLBACK = process.env.RVS_TLS_FALLBACK || "true"; const RVS_TOKEN = process.env.RVS_TOKEN || ""; const VOICES_DIR = "/voices"; function log(msg) { console.log(`[${new Date().toISOString()}] ${msg}`); } // ── RVS Verbindung ────────────────────────────────── let rvsWs = null; let retryDelay = 2; function connectRVS(forcePlain) { if (!RVS_HOST || !RVS_TOKEN) { log("RVS nicht konfiguriert — beende"); process.exit(1); } const useTls = RVS_TLS === "true" && !forcePlain; const proto = useTls ? "wss" : "ws"; const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`; log(`Verbinde zu RVS: ${proto}://${RVS_HOST}:${RVS_PORT}`); const ws = new WebSocket(url); ws.on("open", () => { log("RVS verbunden — warte auf TTS-Requests"); rvsWs = ws; retryDelay = 2; // Keepalive setInterval(() => { if (ws.readyState === WebSocket.OPEN) { ws.ping(); ws.send(JSON.stringify({ type: "heartbeat", timestamp: Date.now() })); } }, 25000); }); ws.on("message", async (raw) => { try { const msg = JSON.parse(raw.toString()); if (msg.type === "xtts_request") { await handleTTSRequest(msg.payload); } else if (msg.type === "voice_upload") { await handleVoiceUpload(msg.payload); } else if (msg.type === "xtts_list_voices") { await handleListVoices(); } } catch (err) { log(`Fehler: ${err.message}`); } }); ws.on("close", () => { log("RVS Verbindung geschlossen"); rvsWs = null; setTimeout(() => connectRVS(), Math.min(retryDelay * 1000, 30000)); retryDelay = Math.min(retryDelay * 2, 30); }); ws.on("error", (err) => { log(`RVS Fehler: ${err.message}`); if (useTls && RVS_TLS_FALLBACK === "true") { log("TLS fehlgeschlagen — Fallback auf ws://"); ws.removeAllListeners(); try { ws.close(); } catch (_) {} connectRVS(true); } }); } // ── TTS Request Handler ───────────────────────────── async function handleTTSRequest(payload) { const { text, voice, requestId, language } = payload; if (!text) return; log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`); try { // Voice-Sample Pfad bestimmen const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null; const hasCustomVoice = voiceSample && fs.existsSync(voiceSample); // XTTS API aufrufen const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null); if (audioBuffer && audioBuffer.length > 100) { const base64 = audioBuffer.toString("base64"); log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`); sendToRVS({ type: "xtts_response", payload: { requestId: requestId || "", base64, mimeType: "audio/wav", voice: voice || "default", engine: "xtts", }, timestamp: Date.now(), }); } else { log("TTS: Leeres Audio erhalten"); sendToRVS({ type: "xtts_response", payload: { requestId, error: "Leeres Audio" }, timestamp: Date.now(), }); } } catch (err) { log(`TTS Fehler: ${err.message}`); sendToRVS({ type: "xtts_response", payload: { requestId, error: err.message }, timestamp: Date.now(), }); } } function callXTTSAPI(text, language, speakerWav) { return new Promise((resolve, reject) => { const body = JSON.stringify({ text, language, speaker_wav: speakerWav || "", }); const url = new URL(`${XTTS_API_URL}/tts_to_audio/`); const options = { hostname: url.hostname, port: url.port, path: url.pathname, method: "POST", headers: { "Content-Type": "application/json", "Content-Length": Buffer.byteLength(body), }, timeout: 60000, }; const req = http.request(options, (res) => { const chunks = []; res.on("data", (chunk) => chunks.push(chunk)); res.on("end", () => { if (res.statusCode === 200) { resolve(Buffer.concat(chunks)); } else { reject(new Error(`XTTS API HTTP ${res.statusCode}: ${Buffer.concat(chunks).toString().slice(0, 200)}`)); } }); }); req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); }); req.write(body); req.end(); }); } // ── Voice Upload Handler ──────────────────────────── async function handleVoiceUpload(payload) { const { name, samples } = payload; if (!name || !samples || !Array.isArray(samples) || samples.length === 0) { log("Voice Upload: Ungueltige Daten"); return; } log(`Voice Upload: "${name}" (${samples.length} Samples)`); try { // Alle Samples zusammenfuegen const buffers = samples.map(s => Buffer.from(s.base64, "base64")); const combined = Buffer.concat(buffers); // Als WAV speichern fs.mkdirSync(VOICES_DIR, { recursive: true }); const filePath = path.join(VOICES_DIR, `${name.replace(/[^a-zA-Z0-9_-]/g, "_")}.wav`); fs.writeFileSync(filePath, combined); log(`Voice gespeichert: ${filePath} (${(combined.length / 1024).toFixed(0)}KB)`); sendToRVS({ type: "xtts_voice_saved", payload: { name, size: combined.length, path: filePath }, timestamp: Date.now(), }); } catch (err) { log(`Voice Upload Fehler: ${err.message}`); } } // ── Voice List Handler ────────────────────────────── async function handleListVoices() { try { const files = fs.existsSync(VOICES_DIR) ? fs.readdirSync(VOICES_DIR).filter(f => f.endsWith(".wav")) : []; const voices = files.map(f => ({ name: path.basename(f, ".wav"), file: f, size: fs.statSync(path.join(VOICES_DIR, f)).size, })); log(`Stimmen: ${voices.length} verfuegbar`); sendToRVS({ type: "xtts_voices_list", payload: { voices }, timestamp: Date.now(), }); } catch (err) { log(`Stimmen-Liste Fehler: ${err.message}`); } } // ── RVS senden ────────────────────────────────────── function sendToRVS(msg) { if (rvsWs && rvsWs.readyState === WebSocket.OPEN) { rvsWs.send(JSON.stringify(msg)); } } // ── Start ─────────────────────────────────────────── log("ARIA XTTS Bridge startet..."); log(`XTTS API: ${XTTS_API_URL}`); log(`RVS: ${RVS_HOST}:${RVS_PORT}`); // Warten bis XTTS API erreichbar ist function waitForXTTS(callback, attempts) { if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; } http.get(`${XTTS_API_URL}/`, (res) => { log(`XTTS API erreichbar (HTTP ${res.statusCode})`); callback(); }).on("error", () => { log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`); setTimeout(() => waitForXTTS(callback, attempts - 1), 10000); // 10s statt 5s (Model laden dauert) }); } waitForXTTS(() => connectRVS(), 30); // Max 5min warten