fix/feat: XTTS-Voice korrekt persistiert, Loeschen + Voice-per-Request

Bug-Fix: Voice-Auswahl verschwand nach Page-Load
- xtts_voices_list Handler rebuildet das Dropdown — vorheriger select.value
  ging dabei verloren. Jetzt wird der Wert gemerkt und nach Rebuild
  wiederhergestellt (falls die Stimme noch existiert).

Feature: Stimmen loeschen (Diagnostic)
- XTTS-Bridge: neuer handleDeleteVoice — entfernt /voices/<name>.wav
  und schickt aktualisierte Liste per xtts_voices_list
- RVS: xtts_delete_voice in ALLOWED_TYPES
- Diagnostic Server: Action xtts_delete_voice forwarded via RVS
- Diagnostic UI: renderVoiceList zeigt alle Custom-Voices mit X-Button
  Bei Loeschen der gerade aktiven Stimme: auf Default zuruecksetzen

Feature: Voice-per-Request in Bridge
- App kann mit jedem Chat ein voice-Feld mitschicken
- Bridge merkt sich _next_voice_override, nutzt es fuer die NAECHSTE
  ARIA-Antwort (einmalig, dann reset)
- tts_request (Play-Button) akzeptiert voice im Payload als Override
- Fallback: globale xtts_voice aus voice_config.json
- So kann jedes Geraet seine eigene Stimme haben ohne den globalen
  Default zu aendern

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
duffyduck 2026-04-19 22:43:26 +02:00
parent 40e48b046b
commit fc2438be2d
5 changed files with 91 additions and 6 deletions

View File

@ -498,6 +498,10 @@ class ARIABridge:
self._last_chat_final_at: float = 0.0
# requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung)
self._xtts_request_to_message: dict[str, str] = {}
# Voice-Override aus letzter Chat-Nachricht einer App.
# Wird fuer die direkt folgende ARIA-Antwort genutzt und dann zurueckgesetzt.
# So kann jedes Geraet seine bevorzugte Stimme bekommen (pro Request).
self._next_voice_override: Optional[str] = None
def initialize(self) -> None:
"""Initialisiert alle Komponenten.
@ -856,14 +860,19 @@ class ARIABridge:
logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name)
return
xtts_voice = getattr(self, 'xtts_voice', '')
# Voice bestimmen: App-Override fuer diesen Request > globale Default-Voice
xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '')
# Override verbrauchen (gilt nur fuer genau diese naechste Antwort)
if self._next_voice_override:
logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override)
self._next_voice_override = None
tts_text = tts_text_preview or text
if not tts_text:
logger.info("[core] TTS-Text leer nach Cleanup — uebersprungen")
return
try:
xtts_request_id = str(uuid.uuid4())
# Map fuer audio_pcm/xtts_response → App-Cache Zuordnung
self._xtts_request_to_message[xtts_request_id] = message_id
if len(self._xtts_request_to_message) > 100:
oldest = next(iter(self._xtts_request_to_message))
@ -1031,6 +1040,11 @@ class ARIABridge:
if sender in ("aria", "stt"):
return
text = payload.get("text", "")
# Voice-Override fuer die naechste ARIA-Antwort merken
voice_override = payload.get("voice", "")
if voice_override:
self._next_voice_override = voice_override
logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override)
if text:
logger.info("[rvs] App-Chat: '%s'", text[:80])
await self.send_to_core(text, source="app")
@ -1096,7 +1110,8 @@ class ARIABridge:
if not text:
return
tts_text = clean_text_for_tts(text) or text
xtts_voice = getattr(self, 'xtts_voice', '')
# Voice aus App-Payload gewinnt, sonst global
xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '')
try:
xtts_request_id = str(uuid.uuid4())
if message_id:

View File

@ -419,6 +419,9 @@
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
</div>
<!-- Gecloned Stimmen — Liste mit Loeschen -->
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
<!-- Voice Cloning -->
<div style="background:#1E1E2E;border-radius:8px;padding:12px;margin-top:8px;">
<div style="color:#0096FF;font-size:13px;font-weight:600;margin-bottom:8px;">Stimme klonen</div>
@ -752,16 +755,23 @@
if (msg.type === 'xtts_voices_list') {
const select = document.getElementById('diag-xtts-voice');
// Behalte erste Option (Default)
// Aktuelle Auswahl merken damit Rebuild sie nicht zerstoert
const previouslySelected = select.value;
while (select.options.length > 1) select.remove(1);
for (const v of (msg.payload?.voices || [])) {
const voices = msg.payload?.voices || [];
for (const v of voices) {
const opt = document.createElement('option');
opt.value = v.name;
opt.textContent = `${v.name} (${(v.size / 1024).toFixed(0)}KB)`;
select.appendChild(opt);
}
document.getElementById('xtts-status').textContent = `XTTS: ${msg.payload?.voices?.length || 0} Stimme(n) verfuegbar`;
// Wenn die vorherige Auswahl weiter existiert → wiederherstellen
if (previouslySelected && voices.some(v => v.name === previouslySelected)) {
select.value = previouslySelected;
}
document.getElementById('xtts-status').textContent = `XTTS: ${voices.length} Stimme(n) verfuegbar`;
document.getElementById('xtts-status').style.color = '#34C759';
renderVoiceList(voices);
return;
}
if (msg.type === 'xtts_voice_saved') {
@ -1356,6 +1366,35 @@
}
// ── XTTS Panel ─────────────────────────────
function renderVoiceList(voices) {
const box = document.getElementById('xtts-voice-list');
if (!box) return;
if (!voices || voices.length === 0) {
box.innerHTML = '<div style="color:#555570;font-size:11px;">Noch keine eigenen Stimmen vorhanden.</div>';
return;
}
let html = '<div style="color:#8888AA;font-size:11px;margin-bottom:4px;">Geclonte Stimmen:</div>';
html += '<div style="display:flex;flex-direction:column;gap:4px;">';
for (const v of voices) {
const esc = (s) => String(s).replace(/[&<>"']/g, c => ({ "&":"&amp;", "<":"&lt;", ">":"&gt;", '"':"&quot;", "'":"&#39;" }[c]));
html += `<div style="display:flex;align-items:center;gap:8px;background:#1E1E2E;border-radius:4px;padding:4px 8px;font-size:12px;">`
+ `<span style="flex:1;color:#E0E0F0;">${esc(v.name)}</span>`
+ `<span style="color:#555570;font-size:10px;">${(v.size/1024).toFixed(0)}KB</span>`
+ `<button class="btn secondary" onclick="deleteXttsVoice('${esc(v.name).replace(/'/g, "\\'")}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;" title="Stimme loeschen">X</button>`
+ `</div>`;
}
html += '</div>';
box.innerHTML = html;
}
function deleteXttsVoice(name) {
if (!confirm(`Stimme "${name}" endgueltig loeschen?`)) return;
send({ action: 'xtts_delete_voice', name });
// Bei aktueller Auswahl: auf Default zuruecksetzen
const sel = document.getElementById('diag-xtts-voice');
if (sel.value === name) { sel.value = ''; sendVoiceConfig(); }
}
// Legacy no-op (XTTS ist jetzt die einzige Engine, kein Panel-Toggle noetig)
function toggleXTTSPanel() {
void 0;

View File

@ -1339,6 +1339,10 @@ wss.on("connection", (ws) => {
} else if (msg.action === "xtts_list_voices") {
// Frische Verbindung die auf Antwort wartet
sendToRVS_withResponse("xtts_list_voices", {}, "xtts_voices_list", ws);
} else if (msg.action === "xtts_delete_voice") {
// Weiterleiten an XTTS-Bridge, die antwortet mit neuer Liste
sendToRVS_raw({ type: "xtts_delete_voice", payload: { name: msg.name }, timestamp: Date.now() });
log("info", "server", `Voice-Delete '${msg.name}' an XTTS-Bridge gesendet`);
} else if (msg.action === "get_voice_config") {
handleGetVoiceConfig(ws);
} else if (msg.action === "send_voice_config") {

View File

@ -18,6 +18,7 @@ const ALLOWED_TYPES = new Set([
"update_check", "update_available", "update_download", "update_data",
"agent_activity", "cancel_request",
"audio_pcm",
"xtts_delete_voice",
]);
// Token-Raum: token -> { clients: Set<ws> }

View File

@ -67,6 +67,8 @@ function connectRVS(forcePlain) {
await handleVoiceUpload(msg.payload);
} else if (msg.type === "xtts_list_voices") {
await handleListVoices();
} else if (msg.type === "xtts_delete_voice") {
await handleDeleteVoice(msg.payload);
}
} catch (err) {
log(`Fehler: ${err.message}`);
@ -337,6 +339,30 @@ async function handleVoiceUpload(payload) {
}
}
// ── Voice Delete Handler ────────────────────────────
async function handleDeleteVoice(payload) {
const { name } = payload || {};
if (!name || typeof name !== "string") {
log("Voice Delete: ungueltiger Name");
return;
}
const safe = name.replace(/[^a-zA-Z0-9_-]/g, "_");
const filePath = path.join(VOICES_DIR, `${safe}.wav`);
try {
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath);
log(`Voice geloescht: ${filePath}`);
} else {
log(`Voice Delete: Datei existiert nicht (${filePath})`);
}
// Aktualisierte Liste an alle Clients senden
await handleListVoices();
} catch (err) {
log(`Voice Delete Fehler: ${err.message}`);
}
}
// ── Voice List Handler ──────────────────────────────
async function handleListVoices() {