ARIA-AGENT/xtts/bridge.js

269 lines
7.9 KiB
JavaScript

/**
* ARIA XTTS Bridge — Verbindet XTTS v2 Server mit dem RVS
*
* Empfaengt tts_request ueber RVS → rendert Audio via XTTS API → sendet zurueck
* Empfaengt voice_upload → speichert Voice-Sample fuer Cloning
* Empfaengt xtts_list_voices → listet verfuegbare Stimmen
*/
const WebSocket = require("ws");
const http = require("http");
const https = require("https");
const fs = require("fs");
const path = require("path");
const XTTS_API_URL = process.env.XTTS_API_URL || "http://xtts:8000";
const RVS_HOST = process.env.RVS_HOST || "";
const RVS_PORT = process.env.RVS_PORT || "443";
const RVS_TLS = process.env.RVS_TLS || "true";
const RVS_TLS_FALLBACK = process.env.RVS_TLS_FALLBACK || "true";
const RVS_TOKEN = process.env.RVS_TOKEN || "";
const VOICES_DIR = "/voices";
function log(msg) {
console.log(`[${new Date().toISOString()}] ${msg}`);
}
// ── RVS Verbindung ──────────────────────────────────
let rvsWs = null;
let retryDelay = 2;
function connectRVS(forcePlain) {
if (!RVS_HOST || !RVS_TOKEN) {
log("RVS nicht konfiguriert — beende");
process.exit(1);
}
const useTls = RVS_TLS === "true" && !forcePlain;
const proto = useTls ? "wss" : "ws";
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
log(`Verbinde zu RVS: ${proto}://${RVS_HOST}:${RVS_PORT}`);
const ws = new WebSocket(url);
ws.on("open", () => {
log("RVS verbunden — warte auf TTS-Requests");
rvsWs = ws;
retryDelay = 2;
// Keepalive
setInterval(() => {
if (ws.readyState === WebSocket.OPEN) {
ws.ping();
ws.send(JSON.stringify({ type: "heartbeat", timestamp: Date.now() }));
}
}, 25000);
});
ws.on("message", async (raw) => {
try {
const msg = JSON.parse(raw.toString());
if (msg.type === "xtts_request") {
await handleTTSRequest(msg.payload);
} else if (msg.type === "voice_upload") {
await handleVoiceUpload(msg.payload);
} else if (msg.type === "xtts_list_voices") {
await handleListVoices();
}
} catch (err) {
log(`Fehler: ${err.message}`);
}
});
ws.on("close", () => {
log("RVS Verbindung geschlossen");
rvsWs = null;
setTimeout(() => connectRVS(), Math.min(retryDelay * 1000, 30000));
retryDelay = Math.min(retryDelay * 2, 30);
});
ws.on("error", (err) => {
log(`RVS Fehler: ${err.message}`);
if (useTls && RVS_TLS_FALLBACK === "true") {
log("TLS fehlgeschlagen — Fallback auf ws://");
ws.removeAllListeners();
try { ws.close(); } catch (_) {}
connectRVS(true);
}
});
}
// ── TTS Request Handler ─────────────────────────────
async function handleTTSRequest(payload) {
const { text, voice, requestId, language } = payload;
if (!text) return;
log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`);
try {
// Voice-Sample Pfad bestimmen
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
// XTTS API aufrufen
const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null);
if (audioBuffer && audioBuffer.length > 100) {
const base64 = audioBuffer.toString("base64");
log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`);
sendToRVS({
type: "xtts_response",
payload: {
requestId: requestId || "",
base64,
mimeType: "audio/wav",
voice: voice || "default",
engine: "xtts",
},
timestamp: Date.now(),
});
} else {
log("TTS: Leeres Audio erhalten");
sendToRVS({
type: "xtts_response",
payload: { requestId, error: "Leeres Audio" },
timestamp: Date.now(),
});
}
} catch (err) {
log(`TTS Fehler: ${err.message}`);
sendToRVS({
type: "xtts_response",
payload: { requestId, error: err.message },
timestamp: Date.now(),
});
}
}
function callXTTSAPI(text, language, speakerWav) {
return new Promise((resolve, reject) => {
const body = JSON.stringify({
text,
language,
speaker_wav: speakerWav || "",
});
const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
const options = {
hostname: url.hostname,
port: url.port,
path: url.pathname,
method: "POST",
headers: {
"Content-Type": "application/json",
"Content-Length": Buffer.byteLength(body),
},
timeout: 60000,
};
const req = http.request(options, (res) => {
const chunks = [];
res.on("data", (chunk) => chunks.push(chunk));
res.on("end", () => {
if (res.statusCode === 200) {
resolve(Buffer.concat(chunks));
} else {
reject(new Error(`XTTS API HTTP ${res.statusCode}: ${Buffer.concat(chunks).toString().slice(0, 200)}`));
}
});
});
req.on("error", reject);
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
req.write(body);
req.end();
});
}
// ── Voice Upload Handler ────────────────────────────
async function handleVoiceUpload(payload) {
const { name, samples } = payload;
if (!name || !samples || !Array.isArray(samples) || samples.length === 0) {
log("Voice Upload: Ungueltige Daten");
return;
}
log(`Voice Upload: "${name}" (${samples.length} Samples)`);
try {
// Alle Samples zusammenfuegen
const buffers = samples.map(s => Buffer.from(s.base64, "base64"));
const combined = Buffer.concat(buffers);
// Als WAV speichern
fs.mkdirSync(VOICES_DIR, { recursive: true });
const filePath = path.join(VOICES_DIR, `${name.replace(/[^a-zA-Z0-9_-]/g, "_")}.wav`);
fs.writeFileSync(filePath, combined);
log(`Voice gespeichert: ${filePath} (${(combined.length / 1024).toFixed(0)}KB)`);
sendToRVS({
type: "xtts_voice_saved",
payload: { name, size: combined.length, path: filePath },
timestamp: Date.now(),
});
} catch (err) {
log(`Voice Upload Fehler: ${err.message}`);
}
}
// ── Voice List Handler ──────────────────────────────
async function handleListVoices() {
try {
const files = fs.existsSync(VOICES_DIR)
? fs.readdirSync(VOICES_DIR).filter(f => f.endsWith(".wav"))
: [];
const voices = files.map(f => ({
name: path.basename(f, ".wav"),
file: f,
size: fs.statSync(path.join(VOICES_DIR, f)).size,
}));
log(`Stimmen: ${voices.length} verfuegbar`);
sendToRVS({
type: "xtts_voices_list",
payload: { voices },
timestamp: Date.now(),
});
} catch (err) {
log(`Stimmen-Liste Fehler: ${err.message}`);
}
}
// ── RVS senden ──────────────────────────────────────
function sendToRVS(msg) {
if (rvsWs && rvsWs.readyState === WebSocket.OPEN) {
rvsWs.send(JSON.stringify(msg));
}
}
// ── Start ───────────────────────────────────────────
log("ARIA XTTS Bridge startet...");
log(`XTTS API: ${XTTS_API_URL}`);
log(`RVS: ${RVS_HOST}:${RVS_PORT}`);
// Warten bis XTTS API erreichbar ist
function waitForXTTS(callback, attempts) {
if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; }
http.get(`${XTTS_API_URL}/docs`, (res) => {
log("XTTS API erreichbar");
callback();
}).on("error", () => {
log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`);
setTimeout(() => waitForXTTS(callback, attempts - 1), 5000);
});
}
waitForXTTS(() => connectRVS(), 24); // Max 2min warten