From fc2438be2df2a131b041b61bd1b267f0f7dd0cf3 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Sun, 19 Apr 2026 22:43:26 +0200 Subject: [PATCH] fix/feat: XTTS-Voice korrekt persistiert, Loeschen + Voice-per-Request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug-Fix: Voice-Auswahl verschwand nach Page-Load - xtts_voices_list Handler rebuildet das Dropdown — vorheriger select.value ging dabei verloren. Jetzt wird der Wert gemerkt und nach Rebuild wiederhergestellt (falls die Stimme noch existiert). Feature: Stimmen loeschen (Diagnostic) - XTTS-Bridge: neuer handleDeleteVoice — entfernt /voices/.wav und schickt aktualisierte Liste per xtts_voices_list - RVS: xtts_delete_voice in ALLOWED_TYPES - Diagnostic Server: Action xtts_delete_voice forwarded via RVS - Diagnostic UI: renderVoiceList zeigt alle Custom-Voices mit X-Button Bei Loeschen der gerade aktiven Stimme: auf Default zuruecksetzen Feature: Voice-per-Request in Bridge - App kann mit jedem Chat ein voice-Feld mitschicken - Bridge merkt sich _next_voice_override, nutzt es fuer die NAECHSTE ARIA-Antwort (einmalig, dann reset) - tts_request (Play-Button) akzeptiert voice im Payload als Override - Fallback: globale xtts_voice aus voice_config.json - So kann jedes Geraet seine eigene Stimme haben ohne den globalen Default zu aendern Co-Authored-By: Claude Opus 4.7 (1M context) --- bridge/aria_bridge.py | 21 +++++++++++++++++--- diagnostic/index.html | 45 ++++++++++++++++++++++++++++++++++++++++--- diagnostic/server.js | 4 ++++ rvs/server.js | 1 + xtts/bridge.js | 26 +++++++++++++++++++++++++ 5 files changed, 91 insertions(+), 6 deletions(-) diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 56c2f96..0a31319 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -498,6 +498,10 @@ class ARIABridge: self._last_chat_final_at: float = 0.0 # requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung) self._xtts_request_to_message: dict[str, str] = {} + # Voice-Override aus letzter Chat-Nachricht einer App. + # Wird fuer die direkt folgende ARIA-Antwort genutzt und dann zurueckgesetzt. + # So kann jedes Geraet seine bevorzugte Stimme bekommen (pro Request). + self._next_voice_override: Optional[str] = None def initialize(self) -> None: """Initialisiert alle Komponenten. @@ -856,14 +860,19 @@ class ARIABridge: logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name) return - xtts_voice = getattr(self, 'xtts_voice', '') + # Voice bestimmen: App-Override fuer diesen Request > globale Default-Voice + xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '') + # Override verbrauchen (gilt nur fuer genau diese naechste Antwort) + if self._next_voice_override: + logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override) + self._next_voice_override = None + tts_text = tts_text_preview or text if not tts_text: logger.info("[core] TTS-Text leer nach Cleanup — uebersprungen") return try: xtts_request_id = str(uuid.uuid4()) - # Map fuer audio_pcm/xtts_response → App-Cache Zuordnung self._xtts_request_to_message[xtts_request_id] = message_id if len(self._xtts_request_to_message) > 100: oldest = next(iter(self._xtts_request_to_message)) @@ -1031,6 +1040,11 @@ class ARIABridge: if sender in ("aria", "stt"): return text = payload.get("text", "") + # Voice-Override fuer die naechste ARIA-Antwort merken + voice_override = payload.get("voice", "") + if voice_override: + self._next_voice_override = voice_override + logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override) if text: logger.info("[rvs] App-Chat: '%s'", text[:80]) await self.send_to_core(text, source="app") @@ -1096,7 +1110,8 @@ class ARIABridge: if not text: return tts_text = clean_text_for_tts(text) or text - xtts_voice = getattr(self, 'xtts_voice', '') + # Voice aus App-Payload gewinnt, sonst global + xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '') try: xtts_request_id = str(uuid.uuid4()) if message_id: diff --git a/diagnostic/index.html b/diagnostic/index.html index ac36329..14e12df 100644 --- a/diagnostic/index.html +++ b/diagnostic/index.html @@ -419,6 +419,9 @@ + +
+
Stimme klonen
@@ -752,16 +755,23 @@ if (msg.type === 'xtts_voices_list') { const select = document.getElementById('diag-xtts-voice'); - // Behalte erste Option (Default) + // Aktuelle Auswahl merken damit Rebuild sie nicht zerstoert + const previouslySelected = select.value; while (select.options.length > 1) select.remove(1); - for (const v of (msg.payload?.voices || [])) { + const voices = msg.payload?.voices || []; + for (const v of voices) { const opt = document.createElement('option'); opt.value = v.name; opt.textContent = `${v.name} (${(v.size / 1024).toFixed(0)}KB)`; select.appendChild(opt); } - document.getElementById('xtts-status').textContent = `XTTS: ${msg.payload?.voices?.length || 0} Stimme(n) verfuegbar`; + // Wenn die vorherige Auswahl weiter existiert → wiederherstellen + if (previouslySelected && voices.some(v => v.name === previouslySelected)) { + select.value = previouslySelected; + } + document.getElementById('xtts-status').textContent = `XTTS: ${voices.length} Stimme(n) verfuegbar`; document.getElementById('xtts-status').style.color = '#34C759'; + renderVoiceList(voices); return; } if (msg.type === 'xtts_voice_saved') { @@ -1356,6 +1366,35 @@ } // ── XTTS Panel ───────────────────────────── + function renderVoiceList(voices) { + const box = document.getElementById('xtts-voice-list'); + if (!box) return; + if (!voices || voices.length === 0) { + box.innerHTML = '
Noch keine eigenen Stimmen vorhanden.
'; + return; + } + let html = '
Geclonte Stimmen:
'; + html += '
'; + for (const v of voices) { + const esc = (s) => String(s).replace(/[&<>"']/g, c => ({ "&":"&", "<":"<", ">":">", '"':""", "'":"'" }[c])); + html += `
` + + `${esc(v.name)}` + + `${(v.size/1024).toFixed(0)}KB` + + `` + + `
`; + } + html += '
'; + box.innerHTML = html; + } + + function deleteXttsVoice(name) { + if (!confirm(`Stimme "${name}" endgueltig loeschen?`)) return; + send({ action: 'xtts_delete_voice', name }); + // Bei aktueller Auswahl: auf Default zuruecksetzen + const sel = document.getElementById('diag-xtts-voice'); + if (sel.value === name) { sel.value = ''; sendVoiceConfig(); } + } + // Legacy no-op (XTTS ist jetzt die einzige Engine, kein Panel-Toggle noetig) function toggleXTTSPanel() { void 0; diff --git a/diagnostic/server.js b/diagnostic/server.js index ae24b4e..49b0465 100644 --- a/diagnostic/server.js +++ b/diagnostic/server.js @@ -1339,6 +1339,10 @@ wss.on("connection", (ws) => { } else if (msg.action === "xtts_list_voices") { // Frische Verbindung die auf Antwort wartet sendToRVS_withResponse("xtts_list_voices", {}, "xtts_voices_list", ws); + } else if (msg.action === "xtts_delete_voice") { + // Weiterleiten an XTTS-Bridge, die antwortet mit neuer Liste + sendToRVS_raw({ type: "xtts_delete_voice", payload: { name: msg.name }, timestamp: Date.now() }); + log("info", "server", `Voice-Delete '${msg.name}' an XTTS-Bridge gesendet`); } else if (msg.action === "get_voice_config") { handleGetVoiceConfig(ws); } else if (msg.action === "send_voice_config") { diff --git a/rvs/server.js b/rvs/server.js index e381b57..9d51648 100644 --- a/rvs/server.js +++ b/rvs/server.js @@ -18,6 +18,7 @@ const ALLOWED_TYPES = new Set([ "update_check", "update_available", "update_download", "update_data", "agent_activity", "cancel_request", "audio_pcm", + "xtts_delete_voice", ]); // Token-Raum: token -> { clients: Set } diff --git a/xtts/bridge.js b/xtts/bridge.js index 7c2a0db..b20cf82 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -67,6 +67,8 @@ function connectRVS(forcePlain) { await handleVoiceUpload(msg.payload); } else if (msg.type === "xtts_list_voices") { await handleListVoices(); + } else if (msg.type === "xtts_delete_voice") { + await handleDeleteVoice(msg.payload); } } catch (err) { log(`Fehler: ${err.message}`); @@ -337,6 +339,30 @@ async function handleVoiceUpload(payload) { } } +// ── Voice Delete Handler ──────────────────────────── + +async function handleDeleteVoice(payload) { + const { name } = payload || {}; + if (!name || typeof name !== "string") { + log("Voice Delete: ungueltiger Name"); + return; + } + const safe = name.replace(/[^a-zA-Z0-9_-]/g, "_"); + const filePath = path.join(VOICES_DIR, `${safe}.wav`); + try { + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + log(`Voice geloescht: ${filePath}`); + } else { + log(`Voice Delete: Datei existiert nicht (${filePath})`); + } + // Aktualisierte Liste an alle Clients senden + await handleListVoices(); + } catch (err) { + log(`Voice Delete Fehler: ${err.message}`); + } +} + // ── Voice List Handler ────────────────────────────── async function handleListVoices() {