feat: Bug-Runde + 5 App/Diagnostic-Features
Bugs: - App Mute-/Auto-Playback: onMessage-Closure hielt stale ttsDeviceEnabled/ ttsMuted → Mute wurde ignoriert + AsyncStorage-Load kam nicht durch. Fix via ttsCanPlayRef (live gespiegelt) statt Closure-Variablen. - App Zombie-Recording: toggleWakeWord hat die laufende Aufnahme nicht gestoppt → audioService.recordingState blieb 'recording' → normaler Aufnahme-Button wirkungslos. Fix: await stopRecording() vor stop(). - Porcupine robuster: BuiltInKeywords-Enum Mapping mit String-Fallback, errorCallback fuer Runtime-Crashes (state zurueck auf off statt App-Crash), mehr Logging damit man beim naechsten Issue debuggen kann. App-Features: - MessageText Komponente: Text ist durchgehend selektierbar, erkennt URLs (http/https), E-Mails, Telefonnummern und macht sie anklickbar (oeffnet Browser / Mail-App / Android-Dialer via Linking). - TTS-Wiedergabegeschwindigkeit pro Geraet einstellbar (Settings -> "Sprechgeschwindigkeit", 0.5-2.0 in 0.1-Schritten, Default 1.0). Wird als speed-Param an die F5-TTS-Bridge durchgereicht. Bridge-Durchreichen: - ChatScreen: speed aus AsyncStorage via ttsSpeedRef, an chat/audio/ tts_request mitgeschickt - aria-bridge: _next_speed_override wie voice_override, an xtts_request weitergereicht - f5tts-bridge: speed-Param an F5TTS.infer() durchgereicht Diagnostic-Feature: - Voice-Preview-Button (Play-Icon) vor dem Delete-X in der Stimmen-Liste - Modal mit Textfeld (Default-Beispieltext wird bei jedem Oeffnen neu gesetzt) und Play-Button - Server sammelt audio_pcm Frames der Preview-Anfrage, baut WAV, schickt base64 zurueck, Browser spielt im <audio>-Tag ab - 60s Timeout-Safety-Net Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+73
-1
@@ -136,6 +136,25 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Voice-Preview Modal -->
|
||||
<div id="voice-preview-modal" style="display:none;position:fixed;inset:0;z-index:1000;background:rgba(0,0,0,0.7);align-items:center;justify-content:center;">
|
||||
<div style="background:#1A1A2E;border:1px solid #2A2A3E;border-radius:10px;padding:20px;max-width:560px;width:90%;display:flex;flex-direction:column;gap:12px;">
|
||||
<div style="display:flex;align-items:center;justify-content:space-between;">
|
||||
<h3 style="margin:0;color:#fff;">Stimmen-Preview: <span id="voice-preview-name">—</span></h3>
|
||||
<button onclick="closeVoicePreview()" style="background:none;border:none;color:#8888AA;font-size:22px;cursor:pointer;">×</button>
|
||||
</div>
|
||||
<textarea id="voice-preview-text" rows="4"
|
||||
style="background:#0D0D1A;border:1px solid #2A2A3E;border-radius:6px;padding:10px;color:#fff;font-size:13px;resize:vertical;"></textarea>
|
||||
<div style="display:flex;gap:8px;align-items:center;">
|
||||
<button id="voice-preview-play" onclick="playVoicePreview()" class="btn primary" style="padding:8px 16px;">
|
||||
▶ Abspielen
|
||||
</button>
|
||||
<span id="voice-preview-status" style="color:#8888AA;font-size:11px;flex:1;"></span>
|
||||
</div>
|
||||
<audio id="voice-preview-audio" controls style="width:100%;display:none;"></audio>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Disk-Space Warnung (dynamisch gesetzt) -->
|
||||
<div id="disk-banner" style="display:none;position:sticky;top:0;z-index:500;padding:10px 14px;border-radius:0;margin:-16px -16px 12px -16px;font-size:13px;">
|
||||
<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;">
|
||||
@@ -930,6 +949,24 @@
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'voice_preview_audio') {
|
||||
const statusEl = document.getElementById('voice-preview-status');
|
||||
const audio = document.getElementById('voice-preview-audio');
|
||||
const playBtn = document.getElementById('voice-preview-play');
|
||||
if (playBtn) playBtn.disabled = false;
|
||||
if (msg.error) {
|
||||
if (statusEl) statusEl.textContent = '❌ Fehler: ' + msg.error;
|
||||
return;
|
||||
}
|
||||
if (msg.base64 && audio) {
|
||||
audio.src = 'data:audio/wav;base64,' + msg.base64;
|
||||
audio.style.display = 'block';
|
||||
audio.play().catch(() => {});
|
||||
if (statusEl) statusEl.textContent = '✅ fertig';
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'voice_ready') {
|
||||
const v = msg.payload?.voice || '';
|
||||
const err = msg.payload?.error;
|
||||
@@ -1579,16 +1616,51 @@
|
||||
html += '<div style="display:flex;flex-direction:column;gap:4px;">';
|
||||
for (const v of voices) {
|
||||
const esc = (s) => String(s).replace(/[&<>"']/g, c => ({ "&":"&", "<":"<", ">":">", '"':""", "'":"'" }[c]));
|
||||
const jsName = esc(v.name).replace(/'/g, "\\'");
|
||||
html += `<div style="display:flex;align-items:center;gap:8px;background:#1E1E2E;border-radius:4px;padding:4px 8px;font-size:12px;">`
|
||||
+ `<span style="flex:1;color:#E0E0F0;">${esc(v.name)}</span>`
|
||||
+ `<span style="color:#555570;font-size:10px;">${(v.size/1024).toFixed(0)}KB</span>`
|
||||
+ `<button class="btn secondary" onclick="deleteXttsVoice('${esc(v.name).replace(/'/g, "\\'")}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;" title="Stimme loeschen">X</button>`
|
||||
+ `<button class="btn secondary" onclick="openVoicePreview('${jsName}')" style="padding:2px 8px;font-size:12px;" title="Stimme anhoeren">▶</button>`
|
||||
+ `<button class="btn secondary" onclick="deleteXttsVoice('${jsName}')" style="padding:2px 8px;font-size:10px;color:#FF6B6B;" title="Stimme loeschen">X</button>`
|
||||
+ `</div>`;
|
||||
}
|
||||
html += '</div>';
|
||||
box.innerHTML = html;
|
||||
}
|
||||
|
||||
// ── Voice Preview Modal ─────────────────────────
|
||||
const VOICE_PREVIEW_DEFAULT = 'Hallo, ich bin ARIA. Das hier ist ein kleiner Test damit du meine Stimme beurteilen kannst.';
|
||||
let currentPreviewVoice = '';
|
||||
|
||||
function openVoicePreview(name) {
|
||||
currentPreviewVoice = name;
|
||||
document.getElementById('voice-preview-name').textContent = name;
|
||||
// Text bei jedem Oeffnen zuruecksetzen
|
||||
document.getElementById('voice-preview-text').value = VOICE_PREVIEW_DEFAULT;
|
||||
document.getElementById('voice-preview-status').textContent = '';
|
||||
const audio = document.getElementById('voice-preview-audio');
|
||||
audio.style.display = 'none';
|
||||
audio.src = '';
|
||||
document.getElementById('voice-preview-modal').style.display = 'flex';
|
||||
}
|
||||
|
||||
function closeVoicePreview() {
|
||||
document.getElementById('voice-preview-modal').style.display = 'none';
|
||||
const audio = document.getElementById('voice-preview-audio');
|
||||
try { audio.pause(); } catch {}
|
||||
}
|
||||
|
||||
function playVoicePreview() {
|
||||
const text = (document.getElementById('voice-preview-text').value || '').trim();
|
||||
if (!text) {
|
||||
document.getElementById('voice-preview-status').textContent = 'Text leer';
|
||||
return;
|
||||
}
|
||||
document.getElementById('voice-preview-status').textContent = '⏳ Rendere...';
|
||||
document.getElementById('voice-preview-play').disabled = true;
|
||||
send({ action: 'preview_voice', voice: currentPreviewVoice, text });
|
||||
}
|
||||
|
||||
function deleteXttsVoice(name) {
|
||||
if (!confirm(`Stimme "${name}" endgueltig loeschen?`)) return;
|
||||
send({ action: 'xtts_delete_voice', name });
|
||||
|
||||
@@ -653,6 +653,9 @@ function connectRVS(forcePlain) {
|
||||
log("info", "rvs", `service_status ${svc} ${state}${model ? ` (${model})` : ""}`);
|
||||
}
|
||||
broadcast({ type: "service_status", payload: msg.payload });
|
||||
} else if (msg.type === "audio_pcm" && msg.payload && _previewPending.size > 0) {
|
||||
// PCM-Chunks einer laufenden Voice-Preview — sammeln + WAV bauen
|
||||
_handlePreviewChunk(msg.payload);
|
||||
} else {
|
||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||
}
|
||||
@@ -1465,6 +1468,8 @@ wss.on("connection", (ws) => {
|
||||
handleSaveTriggers(ws, msg.triggers || []);
|
||||
} else if (msg.action === "test_tts") {
|
||||
handleTestTTS(ws, msg.text || "Test");
|
||||
} else if (msg.action === "preview_voice") {
|
||||
handleVoicePreview(ws, msg.voice || "", msg.text || "Hallo.");
|
||||
} else if (msg.action === "check_tts") {
|
||||
handleCheckTTS(ws);
|
||||
} else if (msg.action === "check_desktop") {
|
||||
@@ -1637,6 +1642,95 @@ async function handleSaveTriggers(clientWs, triggers) {
|
||||
}
|
||||
|
||||
// ── TTS Diagnose (XTTS) ───────────────────────────────
|
||||
// ── Voice Preview ────────────────────────────────────────
|
||||
// Sammelt audio_pcm Chunks einer Preview-Anfrage, baut am Ende eine WAV
|
||||
// und schickt sie base64-kodiert an den Browser-Client.
|
||||
//
|
||||
// Map requestId → { clientWs, chunks: [Buffer], sampleRate, channels }
|
||||
const _previewPending = new Map();
|
||||
|
||||
function _buildWavFromPcm(pcmBuf, sampleRate, channels) {
|
||||
const bitsPerSample = 16;
|
||||
const byteRate = sampleRate * channels * bitsPerSample / 8;
|
||||
const blockAlign = channels * bitsPerSample / 8;
|
||||
const dataSize = pcmBuf.length;
|
||||
const header = Buffer.alloc(44);
|
||||
header.write("RIFF", 0);
|
||||
header.writeUInt32LE(36 + dataSize, 4);
|
||||
header.write("WAVE", 8);
|
||||
header.write("fmt ", 12);
|
||||
header.writeUInt32LE(16, 16); // subchunk1 size
|
||||
header.writeUInt16LE(1, 20); // PCM
|
||||
header.writeUInt16LE(channels, 22);
|
||||
header.writeUInt32LE(sampleRate, 24);
|
||||
header.writeUInt32LE(byteRate, 28);
|
||||
header.writeUInt16LE(blockAlign, 32);
|
||||
header.writeUInt16LE(bitsPerSample, 34);
|
||||
header.write("data", 36);
|
||||
header.writeUInt32LE(dataSize, 40);
|
||||
return Buffer.concat([header, pcmBuf]);
|
||||
}
|
||||
|
||||
function _handlePreviewChunk(payload) {
|
||||
const reqId = payload?.requestId || "";
|
||||
const entry = _previewPending.get(reqId);
|
||||
if (!entry) return;
|
||||
if (payload.base64) {
|
||||
try { entry.chunks.push(Buffer.from(payload.base64, "base64")); } catch {}
|
||||
}
|
||||
if (!entry.sampleRate && payload.sampleRate) entry.sampleRate = payload.sampleRate;
|
||||
if (!entry.channels && payload.channels) entry.channels = payload.channels;
|
||||
if (payload.final) {
|
||||
_previewPending.delete(reqId);
|
||||
try {
|
||||
const pcm = Buffer.concat(entry.chunks);
|
||||
const wav = _buildWavFromPcm(pcm, entry.sampleRate || 24000, entry.channels || 1);
|
||||
const b64 = wav.toString("base64");
|
||||
if (entry.clientWs && entry.clientWs.readyState === 1) {
|
||||
entry.clientWs.send(JSON.stringify({
|
||||
type: "voice_preview_audio",
|
||||
base64: b64,
|
||||
size: wav.length,
|
||||
}));
|
||||
}
|
||||
} catch (err) {
|
||||
if (entry.clientWs && entry.clientWs.readyState === 1) {
|
||||
entry.clientWs.send(JSON.stringify({
|
||||
type: "voice_preview_audio",
|
||||
error: err.message,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function handleVoicePreview(clientWs, voice, text) {
|
||||
try {
|
||||
const requestId = crypto.randomUUID();
|
||||
_previewPending.set(requestId, { clientWs, chunks: [], sampleRate: 0, channels: 0 });
|
||||
// Timeout safety net
|
||||
setTimeout(() => {
|
||||
if (_previewPending.has(requestId)) {
|
||||
_previewPending.delete(requestId);
|
||||
if (clientWs && clientWs.readyState === 1) {
|
||||
clientWs.send(JSON.stringify({
|
||||
type: "voice_preview_audio",
|
||||
error: "Timeout (60s) — keine Antwort vom f5tts-bridge",
|
||||
}));
|
||||
}
|
||||
}
|
||||
}, 60000);
|
||||
log("info", "server", `Voice-Preview: voice="${voice}" text="${text.slice(0, 60)}"`);
|
||||
sendToRVS_raw({
|
||||
type: "xtts_request",
|
||||
payload: { text, language: "de", requestId, voice, speed: 1.0 },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch (err) {
|
||||
clientWs.send(JSON.stringify({ type: "voice_preview_audio", error: err.message }));
|
||||
}
|
||||
}
|
||||
|
||||
async function handleTestTTS(clientWs, text) {
|
||||
try {
|
||||
log("info", "server", `TTS-Test via XTTS: "${text}"`);
|
||||
|
||||
Reference in New Issue
Block a user