diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx index 72f7689..0d214fa 100644 --- a/android/src/screens/ChatScreen.tsx +++ b/android/src/screens/ChatScreen.tsx @@ -18,6 +18,7 @@ import { Image, ScrollView, Modal, + ToastAndroid, } from 'react-native'; import AsyncStorage from '@react-native-async-storage/async-storage'; import RNFS from 'react-native-fs'; @@ -334,6 +335,17 @@ const ChatScreen: React.FC = () => { localXttsVoiceRef.current = newVoice; AsyncStorage.setItem('aria_xtts_voice', newVoice); } + + // XTTS-Bridge meldet Stimme fertig geladen (kurzer Status-Toast) + if (message.type === ('voice_ready' as any)) { + const v = ((message.payload as any).voice as string) ?? ''; + const err = (message.payload as any).error as string | undefined; + if (err) { + ToastAndroid.show(`Stimme "${v}" Fehler: ${err}`, ToastAndroid.LONG); + } else { + ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT); + } + } }); const unsubState = rvs.onStateChange((state) => { diff --git a/android/src/screens/SettingsScreen.tsx b/android/src/screens/SettingsScreen.tsx index 26b558c..966d977 100644 --- a/android/src/screens/SettingsScreen.tsx +++ b/android/src/screens/SettingsScreen.tsx @@ -15,6 +15,8 @@ import { StyleSheet, Alert, Platform, + ToastAndroid, + ActivityIndicator, } from 'react-native'; import AsyncStorage from '@react-native-async-storage/async-storage'; import RNFS from 'react-native-fs'; @@ -82,6 +84,7 @@ const SettingsScreen: React.FC = () => { const [ttsPrerollSec, setTtsPrerollSec] = useState(TTS_PREROLL_DEFAULT_SEC); const [editingPath, setEditingPath] = useState(false); const [xttsVoice, setXttsVoice] = useState(''); + const [loadingVoice, setLoadingVoice] = useState(null); const [availableVoices, setAvailableVoices] = useState>([]); const [voiceCloneVisible, setVoiceCloneVisible] = useState(false); const [tempPath, setTempPath] = useState(''); @@ -266,11 +269,29 @@ const SettingsScreen: React.FC = () => { rvs.send('xtts_list_voices' as any, {}); } - // Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen + // Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen. + // Zusaetzlich Preload triggern, damit der User weiss wann's geladen ist. if (message.type === ('config' as any)) { const newVoice = ((message.payload as any).xttsVoice as string) ?? ''; setXttsVoice(newVoice); AsyncStorage.setItem('aria_xtts_voice', newVoice); + if (newVoice) { + setLoadingVoice(newVoice); + } + } + + // XTTS-Bridge meldet: Stimme fertig geladen + if (message.type === ('voice_ready' as any)) { + const v = ((message.payload as any).voice as string) ?? ''; + const err = (message.payload as any).error as string | undefined; + const ms = (message.payload as any).loadMs as number | undefined; + setLoadingVoice(null); + if (err) { + ToastAndroid.show(`Stimme "${v}" konnte nicht geladen werden: ${err}`, ToastAndroid.LONG); + } else { + const suffix = ms ? ` (${(ms / 1000).toFixed(1)}s)` : ''; + ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit${suffix}`, ToastAndroid.SHORT); + } } }); @@ -340,6 +361,13 @@ const SettingsScreen: React.FC = () => { const selectVoice = useCallback((voiceName: string) => { setXttsVoice(voiceName); AsyncStorage.setItem('aria_xtts_voice', voiceName); + // Preload nur fuer Custom-Voices — "Standard" braucht keinen Ladevorgang + if (voiceName) { + setLoadingVoice(voiceName); + rvs.send('voice_preload' as any, { voice: voiceName, source: 'app' }); + } else { + setLoadingVoice(null); + } }, []); const deleteVoice = useCallback((name: string) => { @@ -619,7 +647,10 @@ const SettingsScreen: React.FC = () => { {(v.size / 1024).toFixed(0)} KB - {xttsVoice === v.name && {'\u2713'}} + {loadingVoice === v.name && ( + + )} + {xttsVoice === v.name && loadingVoice !== v.name && {'\u2713'}} deleteVoice(v.name)} style={styles.voiceRowDelete}> X diff --git a/diagnostic/index.html b/diagnostic/index.html index 4dadb76..f98e2a9 100644 --- a/diagnostic/index.html +++ b/diagnostic/index.html @@ -438,13 +438,14 @@ -
+
+
@@ -851,6 +852,25 @@ return; } + if (msg.type === 'voice_ready') { + const v = msg.payload?.voice || ''; + const err = msg.payload?.error; + const ms = msg.payload?.loadMs; + const statusEl = document.getElementById('voice-status'); + if (statusEl) { + if (err) { + statusEl.textContent = `⚠️ Stimme "${v}" Fehler: ${err}`; + statusEl.style.color = '#FF3B30'; + } else { + statusEl.textContent = `✅ Stimme "${v || 'Standard'}" bereit${ms ? ` (${(ms/1000).toFixed(1)}s)` : ''}`; + statusEl.style.color = '#34C759'; + } + setTimeout(() => { if (statusEl) statusEl.textContent = ''; }, 5000); + } + addLog('info', 'xtts', err ? `Voice "${v}": ${err}` : `Voice "${v || 'Standard'}" bereit`); + return; + } + if (msg.type === 'watchdog') { const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' }; const color = colors[msg.status] || '#FFD60A'; @@ -1551,6 +1571,11 @@ const xttsVoice = document.getElementById('diag-xtts-voice').value; const whisperModel = document.getElementById('diag-whisper-model').value; send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel }); + const statusEl = document.getElementById('voice-status'); + if (statusEl && xttsVoice) { + statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`; + statusEl.style.color = '#FFD60A'; + } } // ── Passwort-Feld Anzeigen/Verbergen ───────────────────── diff --git a/diagnostic/server.js b/diagnostic/server.js index 56f6287..3ed43e6 100644 --- a/diagnostic/server.js +++ b/diagnostic/server.js @@ -626,6 +626,17 @@ function connectRVS(forcePlain) { // Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`); broadcast({ type: "mode", payload: msg.payload }); + } else if (msg.type === "voice_ready") { + // XTTS-Bridge meldet Stimme fertig geladen → an Browser durchreichen + const v = msg.payload?.voice || ""; + const err = msg.payload?.error; + const ms = msg.payload?.loadMs; + if (err) { + log("warn", "rvs", `Voice-Ready Fehler fuer "${v}": ${err}`); + } else { + log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`); + } + broadcast({ type: "voice_ready", payload: msg.payload }); } else { log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`); } diff --git a/rvs/server.js b/rvs/server.js index 9d51648..42696bc 100644 --- a/rvs/server.js +++ b/rvs/server.js @@ -19,6 +19,7 @@ const ALLOWED_TYPES = new Set([ "agent_activity", "cancel_request", "audio_pcm", "xtts_delete_voice", + "voice_preload", "voice_ready", ]); // Token-Raum: token -> { clients: Set } diff --git a/xtts/bridge.js b/xtts/bridge.js index c1ff8fa..673309c 100644 --- a/xtts/bridge.js +++ b/xtts/bridge.js @@ -69,6 +69,18 @@ function connectRVS(forcePlain) { await handleListVoices(); } else if (msg.type === "xtts_delete_voice") { await handleDeleteVoice(msg.payload); + } else if (msg.type === "voice_preload") { + await handleVoicePreload(msg.payload); + } else if (msg.type === "config") { + // Diagnostic hat globale Voice gewechselt → Preload damit der naechste + // Render ohne Ladewartezeit startet + alle Clients "voice_ready" sehen + const v = msg.payload && msg.payload.xttsVoice; + if (v && v !== lastDiagnosticVoice) { + lastDiagnosticVoice = v; + await handleVoicePreload({ voice: v, source: "diagnostic" }); + } else if (!v) { + lastDiagnosticVoice = ""; + } } } catch (err) { log(`Fehler: ${err.message}`); @@ -120,6 +132,10 @@ function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) { // interleaved PCM-Chunks aus zwei Rendern → klingt wie Chaos. let ttsQueue = Promise.resolve(); +// Merkt sich die letzte in Diagnostic gewaehlte Voice, damit wir nicht bei jedem +// config-Broadcast (auch ohne Aenderung) einen Preload triggern. +let lastDiagnosticVoice = ""; + function handleTTSRequest(payload) { ttsQueue = ttsQueue.then(() => _runTTSRequest(payload)).catch(err => { log(`TTS-Queue Fehler: ${err.message}`); @@ -470,6 +486,63 @@ async function handleDeleteVoice(payload) { // ── Voice List Handler ────────────────────────────── +/** + * Preload einer Stimme — rendert stumm ein kurzes Dummy-Audio, damit XTTS + * die Speaker-Latents laedt und der naechste echte Request ohne Wartezeit + * loslegen kann. Broadcastet "voice_ready" wenn fertig (oder mit error). + */ +async function handleVoicePreload(payload) { + const voice = (payload && payload.voice) || ""; + const source = (payload && payload.source) || "unknown"; + const requestId = (payload && payload.requestId) || ""; + log(`Voice-Preload angefordert: "${voice}" (source=${source})`); + + try { + let speakerName = ""; + if (voice) { + const voiceFilePath = path.join(VOICES_DIR, `${voice}.wav`); + if (!fs.existsSync(voiceFilePath)) { + sendToRVS({ + type: "voice_ready", + payload: { voice, requestId, error: "voice-file-not-found" }, + timestamp: Date.now(), + }); + log(`Preload abgebrochen: ${voiceFilePath} existiert nicht`); + return; + } + speakerName = voice; + } + + // Dummy-Request via Queue — damit sich Preload nicht mit echtem TTS ueberholt. + const t0 = Date.now(); + await new Promise((resolve, reject) => { + ttsQueue = ttsQueue.then(async () => { + try { + await streamXTTSAsPCM("ja.", "de", speakerName, () => {}); + resolve(); + } catch (err) { + reject(err); + } + }).catch(reject); + }); + const ms = Date.now() - t0; + log(`Voice "${voice || "default"}" geladen in ${ms}ms`); + + sendToRVS({ + type: "voice_ready", + payload: { voice, requestId, loadMs: ms }, + timestamp: Date.now(), + }); + } catch (err) { + log(`Voice-Preload Fehler: ${err.message}`); + sendToRVS({ + type: "voice_ready", + payload: { voice, requestId, error: err.message.slice(0, 200) }, + timestamp: Date.now(), + }); + } +} + async function handleListVoices() { try { const files = fs.existsSync(VOICES_DIR)