feat: voice_preload/voice_ready — Feedback wenn neue Stimme geladen ist
XTTS-Bridge:
- empfaengt neuen voice_preload Type, rendert stumm "ja." fuer die Stimme
via TTS-Queue (damit kein Konflikt mit echtem TTS)
- horcht zusaetzlich auf config-Broadcasts: wenn Diagnostic global die
Stimme wechselt, wird auto-preloaded
- broadcastet voice_ready mit Dauer (loadMs) oder error
RVS: voice_preload + voice_ready zur ALLOWED_TYPES-Liste.
App (SettingsScreen): beim Wechsel senden wir voice_preload, zeigen einen
Spinner in der Voice-Row und einen Toast mit "Stimme X bereit (Ns)".
App (ChatScreen): Toast auch hier — falls User gerade nicht in Settings ist.
Diagnostic (server+UI): voice_ready wird an Browser durchgereicht, ein
Status-Text unter dem Voice-Dropdown zeigt "wird geladen" → "bereit".
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
c8881f9e4d
commit
9cbea27455
|
|
@ -18,6 +18,7 @@ import {
|
|||
Image,
|
||||
ScrollView,
|
||||
Modal,
|
||||
ToastAndroid,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import RNFS from 'react-native-fs';
|
||||
|
|
@ -334,6 +335,17 @@ const ChatScreen: React.FC = () => {
|
|||
localXttsVoiceRef.current = newVoice;
|
||||
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||
}
|
||||
|
||||
// XTTS-Bridge meldet Stimme fertig geladen (kurzer Status-Toast)
|
||||
if (message.type === ('voice_ready' as any)) {
|
||||
const v = ((message.payload as any).voice as string) ?? '';
|
||||
const err = (message.payload as any).error as string | undefined;
|
||||
if (err) {
|
||||
ToastAndroid.show(`Stimme "${v}" Fehler: ${err}`, ToastAndroid.LONG);
|
||||
} else {
|
||||
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const unsubState = rvs.onStateChange((state) => {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ import {
|
|||
StyleSheet,
|
||||
Alert,
|
||||
Platform,
|
||||
ToastAndroid,
|
||||
ActivityIndicator,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import RNFS from 'react-native-fs';
|
||||
|
|
@ -82,6 +84,7 @@ const SettingsScreen: React.FC = () => {
|
|||
const [ttsPrerollSec, setTtsPrerollSec] = useState<number>(TTS_PREROLL_DEFAULT_SEC);
|
||||
const [editingPath, setEditingPath] = useState(false);
|
||||
const [xttsVoice, setXttsVoice] = useState('');
|
||||
const [loadingVoice, setLoadingVoice] = useState<string | null>(null);
|
||||
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
||||
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
|
||||
const [tempPath, setTempPath] = useState('');
|
||||
|
|
@ -266,11 +269,29 @@ const SettingsScreen: React.FC = () => {
|
|||
rvs.send('xtts_list_voices' as any, {});
|
||||
}
|
||||
|
||||
// Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen
|
||||
// Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen.
|
||||
// Zusaetzlich Preload triggern, damit der User weiss wann's geladen ist.
|
||||
if (message.type === ('config' as any)) {
|
||||
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
|
||||
setXttsVoice(newVoice);
|
||||
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||
if (newVoice) {
|
||||
setLoadingVoice(newVoice);
|
||||
}
|
||||
}
|
||||
|
||||
// XTTS-Bridge meldet: Stimme fertig geladen
|
||||
if (message.type === ('voice_ready' as any)) {
|
||||
const v = ((message.payload as any).voice as string) ?? '';
|
||||
const err = (message.payload as any).error as string | undefined;
|
||||
const ms = (message.payload as any).loadMs as number | undefined;
|
||||
setLoadingVoice(null);
|
||||
if (err) {
|
||||
ToastAndroid.show(`Stimme "${v}" konnte nicht geladen werden: ${err}`, ToastAndroid.LONG);
|
||||
} else {
|
||||
const suffix = ms ? ` (${(ms / 1000).toFixed(1)}s)` : '';
|
||||
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit${suffix}`, ToastAndroid.SHORT);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -340,6 +361,13 @@ const SettingsScreen: React.FC = () => {
|
|||
const selectVoice = useCallback((voiceName: string) => {
|
||||
setXttsVoice(voiceName);
|
||||
AsyncStorage.setItem('aria_xtts_voice', voiceName);
|
||||
// Preload nur fuer Custom-Voices — "Standard" braucht keinen Ladevorgang
|
||||
if (voiceName) {
|
||||
setLoadingVoice(voiceName);
|
||||
rvs.send('voice_preload' as any, { voice: voiceName, source: 'app' });
|
||||
} else {
|
||||
setLoadingVoice(null);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const deleteVoice = useCallback((name: string) => {
|
||||
|
|
@ -619,7 +647,10 @@ const SettingsScreen: React.FC = () => {
|
|||
</Text>
|
||||
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
|
||||
</TouchableOpacity>
|
||||
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
{loadingVoice === v.name && (
|
||||
<ActivityIndicator size="small" color="#0096FF" style={{marginRight: 8}} />
|
||||
)}
|
||||
{xttsVoice === v.name && loadingVoice !== v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
|
||||
<Text style={styles.voiceRowDeleteIcon}>X</Text>
|
||||
</TouchableOpacity>
|
||||
|
|
|
|||
|
|
@ -438,13 +438,14 @@
|
|||
</div>
|
||||
|
||||
<!-- XTTS Stimme -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
|
||||
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
|
||||
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
<option value="">Standard (XTTS Default)</option>
|
||||
</select>
|
||||
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
|
||||
</div>
|
||||
<div id="voice-status" style="font-size:11px;min-height:14px;margin-bottom:12px;color:#8888AA;"></div>
|
||||
|
||||
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
||||
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
||||
|
|
@ -851,6 +852,25 @@
|
|||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'voice_ready') {
|
||||
const v = msg.payload?.voice || '';
|
||||
const err = msg.payload?.error;
|
||||
const ms = msg.payload?.loadMs;
|
||||
const statusEl = document.getElementById('voice-status');
|
||||
if (statusEl) {
|
||||
if (err) {
|
||||
statusEl.textContent = `⚠️ Stimme "${v}" Fehler: ${err}`;
|
||||
statusEl.style.color = '#FF3B30';
|
||||
} else {
|
||||
statusEl.textContent = `✅ Stimme "${v || 'Standard'}" bereit${ms ? ` (${(ms/1000).toFixed(1)}s)` : ''}`;
|
||||
statusEl.style.color = '#34C759';
|
||||
}
|
||||
setTimeout(() => { if (statusEl) statusEl.textContent = ''; }, 5000);
|
||||
}
|
||||
addLog('info', 'xtts', err ? `Voice "${v}": ${err}` : `Voice "${v || 'Standard'}" bereit`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'watchdog') {
|
||||
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
|
||||
const color = colors[msg.status] || '#FFD60A';
|
||||
|
|
@ -1551,6 +1571,11 @@
|
|||
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
||||
const whisperModel = document.getElementById('diag-whisper-model').value;
|
||||
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
||||
const statusEl = document.getElementById('voice-status');
|
||||
if (statusEl && xttsVoice) {
|
||||
statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
|
||||
statusEl.style.color = '#FFD60A';
|
||||
}
|
||||
}
|
||||
|
||||
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
|
||||
|
|
|
|||
|
|
@ -626,6 +626,17 @@ function connectRVS(forcePlain) {
|
|||
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
|
||||
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
|
||||
broadcast({ type: "mode", payload: msg.payload });
|
||||
} else if (msg.type === "voice_ready") {
|
||||
// XTTS-Bridge meldet Stimme fertig geladen → an Browser durchreichen
|
||||
const v = msg.payload?.voice || "";
|
||||
const err = msg.payload?.error;
|
||||
const ms = msg.payload?.loadMs;
|
||||
if (err) {
|
||||
log("warn", "rvs", `Voice-Ready Fehler fuer "${v}": ${err}`);
|
||||
} else {
|
||||
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
|
||||
}
|
||||
broadcast({ type: "voice_ready", payload: msg.payload });
|
||||
} else {
|
||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ const ALLOWED_TYPES = new Set([
|
|||
"agent_activity", "cancel_request",
|
||||
"audio_pcm",
|
||||
"xtts_delete_voice",
|
||||
"voice_preload", "voice_ready",
|
||||
]);
|
||||
|
||||
// Token-Raum: token -> { clients: Set<ws> }
|
||||
|
|
|
|||
|
|
@ -69,6 +69,18 @@ function connectRVS(forcePlain) {
|
|||
await handleListVoices();
|
||||
} else if (msg.type === "xtts_delete_voice") {
|
||||
await handleDeleteVoice(msg.payload);
|
||||
} else if (msg.type === "voice_preload") {
|
||||
await handleVoicePreload(msg.payload);
|
||||
} else if (msg.type === "config") {
|
||||
// Diagnostic hat globale Voice gewechselt → Preload damit der naechste
|
||||
// Render ohne Ladewartezeit startet + alle Clients "voice_ready" sehen
|
||||
const v = msg.payload && msg.payload.xttsVoice;
|
||||
if (v && v !== lastDiagnosticVoice) {
|
||||
lastDiagnosticVoice = v;
|
||||
await handleVoicePreload({ voice: v, source: "diagnostic" });
|
||||
} else if (!v) {
|
||||
lastDiagnosticVoice = "";
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
log(`Fehler: ${err.message}`);
|
||||
|
|
@ -120,6 +132,10 @@ function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) {
|
|||
// interleaved PCM-Chunks aus zwei Rendern → klingt wie Chaos.
|
||||
let ttsQueue = Promise.resolve();
|
||||
|
||||
// Merkt sich die letzte in Diagnostic gewaehlte Voice, damit wir nicht bei jedem
|
||||
// config-Broadcast (auch ohne Aenderung) einen Preload triggern.
|
||||
let lastDiagnosticVoice = "";
|
||||
|
||||
function handleTTSRequest(payload) {
|
||||
ttsQueue = ttsQueue.then(() => _runTTSRequest(payload)).catch(err => {
|
||||
log(`TTS-Queue Fehler: ${err.message}`);
|
||||
|
|
@ -470,6 +486,63 @@ async function handleDeleteVoice(payload) {
|
|||
|
||||
// ── Voice List Handler ──────────────────────────────
|
||||
|
||||
/**
|
||||
* Preload einer Stimme — rendert stumm ein kurzes Dummy-Audio, damit XTTS
|
||||
* die Speaker-Latents laedt und der naechste echte Request ohne Wartezeit
|
||||
* loslegen kann. Broadcastet "voice_ready" wenn fertig (oder mit error).
|
||||
*/
|
||||
async function handleVoicePreload(payload) {
|
||||
const voice = (payload && payload.voice) || "";
|
||||
const source = (payload && payload.source) || "unknown";
|
||||
const requestId = (payload && payload.requestId) || "";
|
||||
log(`Voice-Preload angefordert: "${voice}" (source=${source})`);
|
||||
|
||||
try {
|
||||
let speakerName = "";
|
||||
if (voice) {
|
||||
const voiceFilePath = path.join(VOICES_DIR, `${voice}.wav`);
|
||||
if (!fs.existsSync(voiceFilePath)) {
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, error: "voice-file-not-found" },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
log(`Preload abgebrochen: ${voiceFilePath} existiert nicht`);
|
||||
return;
|
||||
}
|
||||
speakerName = voice;
|
||||
}
|
||||
|
||||
// Dummy-Request via Queue — damit sich Preload nicht mit echtem TTS ueberholt.
|
||||
const t0 = Date.now();
|
||||
await new Promise((resolve, reject) => {
|
||||
ttsQueue = ttsQueue.then(async () => {
|
||||
try {
|
||||
await streamXTTSAsPCM("ja.", "de", speakerName, () => {});
|
||||
resolve();
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
}).catch(reject);
|
||||
});
|
||||
const ms = Date.now() - t0;
|
||||
log(`Voice "${voice || "default"}" geladen in ${ms}ms`);
|
||||
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, loadMs: ms },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch (err) {
|
||||
log(`Voice-Preload Fehler: ${err.message}`);
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, error: err.message.slice(0, 200) },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function handleListVoices() {
|
||||
try {
|
||||
const files = fs.existsSync(VOICES_DIR)
|
||||
|
|
|
|||
Loading…
Reference in New Issue