feat: XTTS v2 integration, auto-update system, TTS engine abstraction

- XTTS v2: Docker setup for Gaming-PC (GPU), bridge via RVS relay - XTTS: Voice cloning UI in Diagnostic (multi-file upload) - XTTS: Engine selectable (Piper local vs XTTS remote) with fallback - Auto-Update: RVS serves APK over WebSocket (no HTTP needed) - Auto-Update: App checks version on start, prompts install - Auto-Update: release.sh copies APK to RVS via scp - Bridge: TTS engine abstraction (piper/xtts), config persistent - Bridge: xtts_response handler, tts_request on-demand - Diagnostic: TTS engine dropdown, XTTS voice panel, voice cloning - App: Play button on ARIA messages, chat search, update service - Wake word: Disabled LiveAudioStream (crash fix), Phase 1 placeholder - Watchdog: Container restart after 8min stuck - Chat backup: on-the-fly to /shared/config/chat_backup.jsonl Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 19:42:10 +02:00
parent 81ca3cc7a7
commit a242693751
16 changed files with 826 additions and 13 deletions
@@ -401,6 +401,17 @@
    <div class="settings-section">
      <h2>Sprachausgabe</h2>
      <div class="card" style="max-width:500px;">
+        <!-- TTS Engine Auswahl -->
+        <div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
+          <label style="color:#8888AA;font-size:12px;">TTS Engine:</label>
+          <select id="diag-tts-engine" onchange="sendVoiceConfig();toggleXTTSPanel()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
+            <option value="piper">Piper (lokal, CPU, schnell)</option>
+            <option value="xtts">XTTS v2 (remote, GPU, natuerlich)</option>
+          </select>
+        </div>
+
+        <!-- Piper Stimmen (nur bei Engine=piper) -->
+        <div id="piper-panel">
        <div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
          <label style="color:#8888AA;font-size:12px;">Standard-Stimme:</label>
          <select id="diag-default-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
@@ -441,6 +452,42 @@
            style="flex:1;accent-color:#0096FF;">
          <span style="color:#555570;font-size:11px;">2.0x</span>
        </div>
+        </div><!-- /piper-panel -->
+
+        <!-- XTTS Panel (nur bei Engine=xtts) -->
+        <div id="xtts-panel" style="display:none;">
+          <div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
+            <label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
+            <select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
+              <option value="">Standard (XTTS Default)</option>
+            </select>
+            <button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
+          </div>
+
+          <!-- Voice Cloning -->
+          <div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
+            <div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
+            <div style="color:#8888AA;font-size:11px;margin-bottom:8px;">
+              Lade ein oder mehrere Audio-Samples hoch (WAV/MP3, min. 6-10 Sekunden).
+              Mehrere Dateien werden automatisch zusammengefuegt.
+            </div>
+            <div style="margin-bottom:8px;">
+              <input type="text" id="xtts-clone-name" placeholder="Name fuer die Stimme..." style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;color:#fff;font-size:13px;width:100%;box-sizing:border-box;">
+            </div>
+            <div style="margin-bottom:8px;">
+              <input type="file" id="xtts-clone-files" accept="audio/*" multiple style="color:#8888AA;font-size:12px;">
+            </div>
+            <div style="display:flex;gap:8px;">
+              <button class="btn" onclick="uploadVoiceSamples()" style="flex:1;">Stimme erstellen</button>
+            </div>
+            <div id="xtts-clone-status" style="font-size:11px;color:#555570;margin-top:6px;"></div>
+          </div>
+
+          <!-- XTTS Status -->
+          <div style="margin-top:8px;font-size:11px;color:#555570;" id="xtts-status">
+            XTTS-Server: Nicht verbunden (starte xtts/ auf dem Gaming-PC)
+          </div>
+        </div>
      </div>
    </div>

@@ -665,6 +712,27 @@
          return;
        }

+        if (msg.type === 'xtts_voices_list') {
+          const select = document.getElementById('diag-xtts-voice');
+          // Behalte erste Option (Default)
+          while (select.options.length > 1) select.remove(1);
+          for (const v of (msg.payload?.voices || [])) {
+            const opt = document.createElement('option');
+            opt.value = v.name;
+            opt.textContent = `${v.name} (${(v.size / 1024).toFixed(0)}KB)`;
+            select.appendChild(opt);
+          }
+          document.getElementById('xtts-status').textContent = `XTTS: ${msg.payload?.voices?.length || 0} Stimme(n) verfuegbar`;
+          document.getElementById('xtts-status').style.color = '#34C759';
+          return;
+        }
+        if (msg.type === 'xtts_voice_saved') {
+          document.getElementById('xtts-clone-status').textContent = `Stimme "${msg.payload?.name}" gespeichert!`;
+          document.getElementById('xtts-clone-status').style.color = '#34C759';
+          loadXTTSVoices(); // Liste neu laden
+          return;
+        }
+
        if (msg.type === 'voice_config') {
          document.getElementById('diag-default-voice').value = msg.defaultVoice || 'ramona';
          document.getElementById('diag-highlight-voice').value = msg.highlightVoice || 'thorsten';
@@ -675,6 +743,9 @@
          document.getElementById('speed-ramona-label').textContent = sr + 'x';
          document.getElementById('diag-speed-thorsten').value = st;
          document.getElementById('speed-thorsten-label').textContent = st + 'x';
+          document.getElementById('diag-tts-engine').value = msg.ttsEngine || 'piper';
+          document.getElementById('diag-xtts-voice').value = msg.xttsVoice || '';
+          toggleXTTSPanel();
          return;
        }

@@ -1167,6 +1238,44 @@
      }, 120000);
    }

+    // ── XTTS Panel ─────────────────────────────
+    function toggleXTTSPanel() {
+      const engine = document.getElementById('diag-tts-engine').value;
+      document.getElementById('piper-panel').style.display = engine === 'piper' ? 'block' : 'none';
+      document.getElementById('xtts-panel').style.display = engine === 'xtts' ? 'block' : 'none';
+      if (engine === 'xtts') loadXTTSVoices();
+    }
+
+    function loadXTTSVoices() {
+      sendToRVS_raw({ type: 'xtts_list_voices', payload: {}, timestamp: Date.now() });
+    }
+
+    async function uploadVoiceSamples() {
+      const name = document.getElementById('xtts-clone-name').value.trim();
+      const files = document.getElementById('xtts-clone-files').files;
+      if (!name) { alert('Bitte einen Namen eingeben'); return; }
+      if (!files || files.length === 0) { alert('Bitte Audio-Dateien auswaehlen'); return; }
+
+      document.getElementById('xtts-clone-status').textContent = `Lade ${files.length} Datei(en) hoch...`;
+
+      const samples = [];
+      for (const file of files) {
+        const buffer = await file.arrayBuffer();
+        const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer)));
+        samples.push({ base64, name: file.name, size: file.size });
+      }
+
+      const totalSize = samples.reduce((s, f) => s + f.size, 0);
+      document.getElementById('xtts-clone-status').textContent =
+        `Sende ${samples.length} Sample(s) (${(totalSize / 1024).toFixed(0)}KB) an XTTS-Server...`;
+
+      sendToRVS_raw({
+        type: 'voice_upload',
+        payload: { name, samples },
+        timestamp: Date.now(),
+      });
+    }
+
    // ── Abbrechen ──────────────────────────────
    function cancelRequest() {
      send({ action: 'cancel_request' });
@@ -1181,7 +1290,9 @@
      const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
      const speedRamona = parseFloat(document.getElementById('diag-speed-ramona').value);
      const speedThorsten = parseFloat(document.getElementById('diag-speed-thorsten').value);
-      send({ action: 'send_voice_config', defaultVoice, highlightVoice, ttsEnabled, speedRamona, speedThorsten });
+      const ttsEngine = document.getElementById('diag-tts-engine').value;
+      const xttsVoice = document.getElementById('diag-xtts-voice').value;
+      send({ action: 'send_voice_config', defaultVoice, highlightVoice, ttsEnabled, speedRamona, speedThorsten, ttsEngine, xttsVoice });
    }

    // ── Highlight-Trigger ────────────────────────