feat: XTTS v2 integration, auto-update system, TTS engine abstraction

- XTTS v2: Docker setup for Gaming-PC (GPU), bridge via RVS relay
- XTTS: Voice cloning UI in Diagnostic (multi-file upload)
- XTTS: Engine selectable (Piper local vs XTTS remote) with fallback
- Auto-Update: RVS serves APK over WebSocket (no HTTP needed)
- Auto-Update: App checks version on start, prompts install
- Auto-Update: release.sh copies APK to RVS via scp
- Bridge: TTS engine abstraction (piper/xtts), config persistent
- Bridge: xtts_response handler, tts_request on-demand
- Diagnostic: TTS engine dropdown, XTTS voice panel, voice cloning
- App: Play button on ARIA messages, chat search, update service
- Wake word: Disabled LiveAudioStream (crash fix), Phase 1 placeholder
- Watchdog: Container restart after 8min stuck
- Chat backup: on-the-fly to /shared/config/chat_backup.jsonl

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 19:42:10 +02:00
parent 81ca3cc7a7
commit a242693751
16 changed files with 826 additions and 13 deletions
+112 -1
View File
@@ -401,6 +401,17 @@
<div class="settings-section">
<h2>Sprachausgabe</h2>
<div class="card" style="max-width:500px;">
<!-- TTS Engine Auswahl -->
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
<label style="color:#8888AA;font-size:12px;">TTS Engine:</label>
<select id="diag-tts-engine" onchange="sendVoiceConfig();toggleXTTSPanel()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
<option value="piper">Piper (lokal, CPU, schnell)</option>
<option value="xtts">XTTS v2 (remote, GPU, natuerlich)</option>
</select>
</div>
<!-- Piper Stimmen (nur bei Engine=piper) -->
<div id="piper-panel">
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
<label style="color:#8888AA;font-size:12px;">Standard-Stimme:</label>
<select id="diag-default-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
@@ -441,6 +452,42 @@
style="flex:1;accent-color:#0096FF;">
<span style="color:#555570;font-size:11px;">2.0x</span>
</div>
</div><!-- /piper-panel -->
<!-- XTTS Panel (nur bei Engine=xtts) -->
<div id="xtts-panel" style="display:none;">
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
<option value="">Standard (XTTS Default)</option>
</select>
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
</div>
<!-- Voice Cloning -->
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
<div style="color:#8888AA;font-size:11px;margin-bottom:8px;">
Lade ein oder mehrere Audio-Samples hoch (WAV/MP3, min. 6-10 Sekunden).
Mehrere Dateien werden automatisch zusammengefuegt.
</div>
<div style="margin-bottom:8px;">
<input type="text" id="xtts-clone-name" placeholder="Name fuer die Stimme..." style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;color:#fff;font-size:13px;width:100%;box-sizing:border-box;">
</div>
<div style="margin-bottom:8px;">
<input type="file" id="xtts-clone-files" accept="audio/*" multiple style="color:#8888AA;font-size:12px;">
</div>
<div style="display:flex;gap:8px;">
<button class="btn" onclick="uploadVoiceSamples()" style="flex:1;">Stimme erstellen</button>
</div>
<div id="xtts-clone-status" style="font-size:11px;color:#555570;margin-top:6px;"></div>
</div>
<!-- XTTS Status -->
<div style="margin-top:8px;font-size:11px;color:#555570;" id="xtts-status">
XTTS-Server: Nicht verbunden (starte xtts/ auf dem Gaming-PC)
</div>
</div>
</div>
</div>
@@ -665,6 +712,27 @@
return;
}
if (msg.type === 'xtts_voices_list') {
const select = document.getElementById('diag-xtts-voice');
// Behalte erste Option (Default)
while (select.options.length > 1) select.remove(1);
for (const v of (msg.payload?.voices || [])) {
const opt = document.createElement('option');
opt.value = v.name;
opt.textContent = `${v.name} (${(v.size / 1024).toFixed(0)}KB)`;
select.appendChild(opt);
}
document.getElementById('xtts-status').textContent = `XTTS: ${msg.payload?.voices?.length || 0} Stimme(n) verfuegbar`;
document.getElementById('xtts-status').style.color = '#34C759';
return;
}
if (msg.type === 'xtts_voice_saved') {
document.getElementById('xtts-clone-status').textContent = `Stimme "${msg.payload?.name}" gespeichert!`;
document.getElementById('xtts-clone-status').style.color = '#34C759';
loadXTTSVoices(); // Liste neu laden
return;
}
if (msg.type === 'voice_config') {
document.getElementById('diag-default-voice').value = msg.defaultVoice || 'ramona';
document.getElementById('diag-highlight-voice').value = msg.highlightVoice || 'thorsten';
@@ -675,6 +743,9 @@
document.getElementById('speed-ramona-label').textContent = sr + 'x';
document.getElementById('diag-speed-thorsten').value = st;
document.getElementById('speed-thorsten-label').textContent = st + 'x';
document.getElementById('diag-tts-engine').value = msg.ttsEngine || 'piper';
document.getElementById('diag-xtts-voice').value = msg.xttsVoice || '';
toggleXTTSPanel();
return;
}
@@ -1167,6 +1238,44 @@
}, 120000);
}
// ── XTTS Panel ─────────────────────────────
function toggleXTTSPanel() {
const engine = document.getElementById('diag-tts-engine').value;
document.getElementById('piper-panel').style.display = engine === 'piper' ? 'block' : 'none';
document.getElementById('xtts-panel').style.display = engine === 'xtts' ? 'block' : 'none';
if (engine === 'xtts') loadXTTSVoices();
}
function loadXTTSVoices() {
sendToRVS_raw({ type: 'xtts_list_voices', payload: {}, timestamp: Date.now() });
}
async function uploadVoiceSamples() {
const name = document.getElementById('xtts-clone-name').value.trim();
const files = document.getElementById('xtts-clone-files').files;
if (!name) { alert('Bitte einen Namen eingeben'); return; }
if (!files || files.length === 0) { alert('Bitte Audio-Dateien auswaehlen'); return; }
document.getElementById('xtts-clone-status').textContent = `Lade ${files.length} Datei(en) hoch...`;
const samples = [];
for (const file of files) {
const buffer = await file.arrayBuffer();
const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer)));
samples.push({ base64, name: file.name, size: file.size });
}
const totalSize = samples.reduce((s, f) => s + f.size, 0);
document.getElementById('xtts-clone-status').textContent =
`Sende ${samples.length} Sample(s) (${(totalSize / 1024).toFixed(0)}KB) an XTTS-Server...`;
sendToRVS_raw({
type: 'voice_upload',
payload: { name, samples },
timestamp: Date.now(),
});
}
// ── Abbrechen ──────────────────────────────
function cancelRequest() {
send({ action: 'cancel_request' });
@@ -1181,7 +1290,9 @@
const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
const speedRamona = parseFloat(document.getElementById('diag-speed-ramona').value);
const speedThorsten = parseFloat(document.getElementById('diag-speed-thorsten').value);
send({ action: 'send_voice_config', defaultVoice, highlightVoice, ttsEnabled, speedRamona, speedThorsten });
const ttsEngine = document.getElementById('diag-tts-engine').value;
const xttsVoice = document.getElementById('diag-xtts-voice').value;
send({ action: 'send_voice_config', defaultVoice, highlightVoice, ttsEnabled, speedRamona, speedThorsten, ttsEngine, xttsVoice });
}
// ── Highlight-Trigger ────────────────────────