feat: XTTS v2 integration, auto-update system, TTS engine abstraction

- XTTS v2: Docker setup for Gaming-PC (GPU), bridge via RVS relay - XTTS: Voice cloning UI in Diagnostic (multi-file upload) - XTTS: Engine selectable (Piper local vs XTTS remote) with fallback - Auto-Update: RVS serves APK over WebSocket (no HTTP needed) - Auto-Update: App checks version on start, prompts install - Auto-Update: release.sh copies APK to RVS via scp - Bridge: TTS engine abstraction (piper/xtts), config persistent - Bridge: xtts_response handler, tts_request on-demand - Diagnostic: TTS engine dropdown, XTTS voice panel, voice cloning - App: Play button on ARIA messages, chat search, update service - Wake word: Disabled LiveAudioStream (crash fix), Phase 1 placeholder - Watchdog: Container restart after 8min stuck - Chat backup: on-the-fly to /shared/config/chat_backup.jsonl Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 19:42:10 +02:00
parent 81ca3cc7a7
commit a242693751
16 changed files with 826 additions and 13 deletions
@@ -0,0 +1,11 @@
+# ════════════════════════════════════════════════
+#  ARIA XTTS v2 — Konfiguration
+#  Kopieren nach .env und anpassen
+# ════════════════════════════════════════════════
+
+# RVS Verbindung (gleiche Daten wie auf der ARIA-VM)
+RVS_HOST=mobil.hacker-net.de
+RVS_PORT=444
+RVS_TLS=true
+RVS_TLS_FALLBACK=true
+RVS_TOKEN=dein_token_hier
@@ -0,0 +1,5 @@
+FROM node:22-alpine
+WORKDIR /app
+COPY bridge.js package.json ./
+RUN npm install --production
+CMD ["node", "bridge.js"]
@@ -0,0 +1,268 @@
+/**
+ * ARIA XTTS Bridge — Verbindet XTTS v2 Server mit dem RVS
+ *
+ * Empfaengt tts_request ueber RVS → rendert Audio via XTTS API → sendet zurueck
+ * Empfaengt voice_upload → speichert Voice-Sample fuer Cloning
+ * Empfaengt xtts_list_voices → listet verfuegbare Stimmen
+ */
+
+const WebSocket = require("ws");
+const http = require("http");
+const https = require("https");
+const fs = require("fs");
+const path = require("path");
+
+const XTTS_API_URL = process.env.XTTS_API_URL || "http://xtts:8000";
+const RVS_HOST = process.env.RVS_HOST || "";
+const RVS_PORT = process.env.RVS_PORT || "443";
+const RVS_TLS = process.env.RVS_TLS || "true";
+const RVS_TLS_FALLBACK = process.env.RVS_TLS_FALLBACK || "true";
+const RVS_TOKEN = process.env.RVS_TOKEN || "";
+const VOICES_DIR = "/voices";
+
+function log(msg) {
+  console.log(`[${new Date().toISOString()}] ${msg}`);
+}
+
+// ── RVS Verbindung ──────────────────────────────────
+
+let rvsWs = null;
+let retryDelay = 2;
+
+function connectRVS(forcePlain) {
+  if (!RVS_HOST || !RVS_TOKEN) {
+    log("RVS nicht konfiguriert — beende");
+    process.exit(1);
+  }
+
+  const useTls = RVS_TLS === "true" && !forcePlain;
+  const proto = useTls ? "wss" : "ws";
+  const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
+
+  log(`Verbinde zu RVS: ${proto}://${RVS_HOST}:${RVS_PORT}`);
+
+  const ws = new WebSocket(url);
+
+  ws.on("open", () => {
+    log("RVS verbunden — warte auf TTS-Requests");
+    rvsWs = ws;
+    retryDelay = 2;
+
+    // Keepalive
+    setInterval(() => {
+      if (ws.readyState === WebSocket.OPEN) {
+        ws.ping();
+        ws.send(JSON.stringify({ type: "heartbeat", timestamp: Date.now() }));
+      }
+    }, 25000);
+  });
+
+  ws.on("message", async (raw) => {
+    try {
+      const msg = JSON.parse(raw.toString());
+
+      if (msg.type === "xtts_request") {
+        await handleTTSRequest(msg.payload);
+      } else if (msg.type === "voice_upload") {
+        await handleVoiceUpload(msg.payload);
+      } else if (msg.type === "xtts_list_voices") {
+        await handleListVoices();
+      }
+    } catch (err) {
+      log(`Fehler: ${err.message}`);
+    }
+  });
+
+  ws.on("close", () => {
+    log("RVS Verbindung geschlossen");
+    rvsWs = null;
+    setTimeout(() => connectRVS(), Math.min(retryDelay * 1000, 30000));
+    retryDelay = Math.min(retryDelay * 2, 30);
+  });
+
+  ws.on("error", (err) => {
+    log(`RVS Fehler: ${err.message}`);
+    if (useTls && RVS_TLS_FALLBACK === "true") {
+      log("TLS fehlgeschlagen — Fallback auf ws://");
+      ws.removeAllListeners();
+      try { ws.close(); } catch (_) {}
+      connectRVS(true);
+    }
+  });
+}
+
+// ── TTS Request Handler ─────────────────────────────
+
+async function handleTTSRequest(payload) {
+  const { text, voice, requestId, language } = payload;
+  if (!text) return;
+
+  log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`);
+
+  try {
+    // Voice-Sample Pfad bestimmen
+    const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
+    const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
+
+    // XTTS API aufrufen
+    const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null);
+
+    if (audioBuffer && audioBuffer.length > 100) {
+      const base64 = audioBuffer.toString("base64");
+      log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`);
+
+      sendToRVS({
+        type: "xtts_response",
+        payload: {
+          requestId: requestId || "",
+          base64,
+          mimeType: "audio/wav",
+          voice: voice || "default",
+          engine: "xtts",
+        },
+        timestamp: Date.now(),
+      });
+    } else {
+      log("TTS: Leeres Audio erhalten");
+      sendToRVS({
+        type: "xtts_response",
+        payload: { requestId, error: "Leeres Audio" },
+        timestamp: Date.now(),
+      });
+    }
+  } catch (err) {
+    log(`TTS Fehler: ${err.message}`);
+    sendToRVS({
+      type: "xtts_response",
+      payload: { requestId, error: err.message },
+      timestamp: Date.now(),
+    });
+  }
+}
+
+function callXTTSAPI(text, language, speakerWav) {
+  return new Promise((resolve, reject) => {
+    const body = JSON.stringify({
+      text,
+      language,
+      speaker_wav: speakerWav || "",
+    });
+
+    const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
+    const options = {
+      hostname: url.hostname,
+      port: url.port,
+      path: url.pathname,
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "Content-Length": Buffer.byteLength(body),
+      },
+      timeout: 60000,
+    };
+
+    const req = http.request(options, (res) => {
+      const chunks = [];
+      res.on("data", (chunk) => chunks.push(chunk));
+      res.on("end", () => {
+        if (res.statusCode === 200) {
+          resolve(Buffer.concat(chunks));
+        } else {
+          reject(new Error(`XTTS API HTTP ${res.statusCode}: ${Buffer.concat(chunks).toString().slice(0, 200)}`));
+        }
+      });
+    });
+
+    req.on("error", reject);
+    req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
+    req.write(body);
+    req.end();
+  });
+}
+
+// ── Voice Upload Handler ────────────────────────────
+
+async function handleVoiceUpload(payload) {
+  const { name, samples } = payload;
+  if (!name || !samples || !Array.isArray(samples) || samples.length === 0) {
+    log("Voice Upload: Ungueltige Daten");
+    return;
+  }
+
+  log(`Voice Upload: "${name}" (${samples.length} Samples)`);
+
+  try {
+    // Alle Samples zusammenfuegen
+    const buffers = samples.map(s => Buffer.from(s.base64, "base64"));
+    const combined = Buffer.concat(buffers);
+
+    // Als WAV speichern
+    fs.mkdirSync(VOICES_DIR, { recursive: true });
+    const filePath = path.join(VOICES_DIR, `${name.replace(/[^a-zA-Z0-9_-]/g, "_")}.wav`);
+    fs.writeFileSync(filePath, combined);
+
+    log(`Voice gespeichert: ${filePath} (${(combined.length / 1024).toFixed(0)}KB)`);
+
+    sendToRVS({
+      type: "xtts_voice_saved",
+      payload: { name, size: combined.length, path: filePath },
+      timestamp: Date.now(),
+    });
+  } catch (err) {
+    log(`Voice Upload Fehler: ${err.message}`);
+  }
+}
+
+// ── Voice List Handler ──────────────────────────────
+
+async function handleListVoices() {
+  try {
+    const files = fs.existsSync(VOICES_DIR)
+      ? fs.readdirSync(VOICES_DIR).filter(f => f.endsWith(".wav"))
+      : [];
+
+    const voices = files.map(f => ({
+      name: path.basename(f, ".wav"),
+      file: f,
+      size: fs.statSync(path.join(VOICES_DIR, f)).size,
+    }));
+
+    log(`Stimmen: ${voices.length} verfuegbar`);
+
+    sendToRVS({
+      type: "xtts_voices_list",
+      payload: { voices },
+      timestamp: Date.now(),
+    });
+  } catch (err) {
+    log(`Stimmen-Liste Fehler: ${err.message}`);
+  }
+}
+
+// ── RVS senden ──────────────────────────────────────
+
+function sendToRVS(msg) {
+  if (rvsWs && rvsWs.readyState === WebSocket.OPEN) {
+    rvsWs.send(JSON.stringify(msg));
+  }
+}
+
+// ── Start ───────────────────────────────────────────
+
+log("ARIA XTTS Bridge startet...");
+log(`XTTS API: ${XTTS_API_URL}`);
+log(`RVS: ${RVS_HOST}:${RVS_PORT}`);
+
+// Warten bis XTTS API erreichbar ist
+function waitForXTTS(callback, attempts) {
+  if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; }
+  http.get(`${XTTS_API_URL}/docs`, (res) => {
+    log("XTTS API erreichbar");
+    callback();
+  }).on("error", () => {
+    log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`);
+    setTimeout(() => waitForXTTS(callback, attempts - 1), 5000);
+  });
+}
+
+waitForXTTS(() => connectRVS(), 24); // Max 2min warten
@@ -0,0 +1,54 @@
+# ════════════════════════════════════════════════
+#  ARIA XTTS v2 — GPU TTS Server
+#  Laeuft auf dem Gaming-PC (RTX 3060)
+#  Verbindet sich zum RVS fuer TTS-Requests
+# ════════════════════════════════════════════════
+#
+#  Voraussetzungen:
+#    - Docker Desktop mit WSL2
+#    - NVIDIA Container Toolkit
+#    - .env mit RVS-Verbindungsdaten
+#
+#  Start: docker compose up -d
+#  Test:  curl http://localhost:8000/docs
+# ════════════════════════════════════════════════
+
+services:
+
+  # ─── XTTS v2 API Server (GPU) ─────────────────
+  xtts:
+    image: ghcr.io/daswer123/xtts-api-server:latest
+    container_name: aria-xtts
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    ports:
+      - "8000:8000"
+    volumes:
+      - xtts-models:/root/.local/share/tts     # Model-Cache (~2GB)
+      - ./voices:/voices                        # Custom Voice Samples
+    environment:
+      - COQUI_TOS_AGREED=1
+    restart: unless-stopped
+
+  # ─── XTTS Bridge (verbindet zu RVS) ───────────
+  xtts-bridge:
+    build: .
+    container_name: aria-xtts-bridge
+    depends_on:
+      - xtts
+    environment:
+      - XTTS_API_URL=http://xtts:8000
+      - RVS_HOST=${RVS_HOST}
+      - RVS_PORT=${RVS_PORT:-443}
+      - RVS_TLS=${RVS_TLS:-true}
+      - RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
+      - RVS_TOKEN=${RVS_TOKEN}
+    restart: unless-stopped
+
+volumes:
+  xtts-models:
@@ -0,0 +1,8 @@
+{
+  "name": "aria-xtts-bridge",
+  "version": "1.0.0",
+  "private": true,
+  "dependencies": {
+    "ws": "^8.16.0"
+  }
+}