feat(speaker-id): Phase 2 — Enrollment-UI (App) + Voice-ID-Section (Diagnostic)
App-Seite: - VoiceIdEnrollment.tsx (neue Komponente, ~370 Zeilen): Status-Karte (loading/unenrolled/enrolled/error), Sample-Recorder mit Countdown (4s fest pro Sample), Liste mit einzelnem Loeschen, Save-Button (disabled bis 5 Samples), Fingerprint-Delete mit Confirm. - SettingsScreen.tsx: neue Section 🎤 'Stimme einrichten' zwischen Wake-Word und Sprachausgabe. - Sample-Format: WAV via audioService.startRecording — wird whisper-bridge-seitig per wave-Modul gestrippt. Diagnostic-Seite: - Neue settings-section 'Voice-ID (Sprecher-Erkennung)': Status-Anzeige (live ueber voice_id_status_response), Threshold-Slider 0.30-0.70 (persistiert in voice_config.json, broadcast als config-Message), Refresh + Delete-Button. - server.js: 2 neue actions (voice_id_status, voice_id_delete), send_voice_config nimmt voiceIdThreshold mit auf. Backend: - speaker_id.py: _normalize_audio_bytes erkennt jetzt WAV-Header (RIFF/WAVE) und strippt auf rohes PCM — sonst werfen die ECAPA- Embeddings auf den 44-Byte-Header rein. - bridge.py: config-Broadcast-Handler setzt voiceIdThreshold auf speaker_id.DEFAULT_THRESHOLD (wird erst in Phase 3 beim Gating genutzt, persistiert aber schon). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,426 @@
|
||||
/**
|
||||
* Voice-ID Enrollment + Status — App-seitig.
|
||||
*
|
||||
* User nimmt 5-7 Samples (je 4s) seiner Stimme auf, App schickt sie an
|
||||
* die whisper-bridge via RVS (voice_id_enroll_request). Bridge berechnet
|
||||
* SpeechBrain-ECAPA-Embeddings, mittelt sie zu einem Fingerprint, speichert
|
||||
* /voice-id/fingerprint.json.
|
||||
*
|
||||
* Verwendung: in SettingsScreen für Section 'voice_id' eingebunden.
|
||||
* Holt Status bei Mount + nach jedem Enroll/Delete neu ab.
|
||||
*/
|
||||
|
||||
import React, { useCallback, useEffect, useState } from 'react';
|
||||
import {
|
||||
ActivityIndicator,
|
||||
Alert,
|
||||
ScrollView,
|
||||
StyleSheet,
|
||||
Text,
|
||||
ToastAndroid,
|
||||
TouchableOpacity,
|
||||
View,
|
||||
} from 'react-native';
|
||||
|
||||
import audioService from '../services/audio';
|
||||
import rvs from '../services/rvs';
|
||||
|
||||
const SAMPLE_DURATION_MS = 4000; // Pro Sample 4s aufnehmen
|
||||
const SAMPLES_REQUIRED = 5; // Mindest-Sampleanzahl fuer Save
|
||||
|
||||
type Sample = {
|
||||
base64: string;
|
||||
durationMs: number;
|
||||
};
|
||||
|
||||
type Status =
|
||||
| { state: 'loading' }
|
||||
| { state: 'unenrolled' }
|
||||
| { state: 'enrolled'; sampleCount: number; durations: number[]; updatedAt: number; dim: number }
|
||||
| { state: 'error'; message: string };
|
||||
|
||||
function _newReqId(prefix: string): string {
|
||||
return `${prefix}_${Date.now().toString(36)}_${Math.floor(Math.random() * 1e6).toString(36)}`;
|
||||
}
|
||||
|
||||
export const VoiceIdEnrollment: React.FC = () => {
|
||||
const [status, setStatus] = useState<Status>({ state: 'loading' });
|
||||
const [samples, setSamples] = useState<Sample[]>([]);
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [recordCountdown, setRecordCountdown] = useState(0);
|
||||
const [enrollPending, setEnrollPending] = useState(false);
|
||||
const [pendingReqId, setPendingReqId] = useState<string | null>(null);
|
||||
|
||||
// Status laden
|
||||
const refreshStatus = useCallback(() => {
|
||||
setStatus({ state: 'loading' });
|
||||
const reqId = _newReqId('vid');
|
||||
setPendingReqId(reqId);
|
||||
rvs.send('voice_id_status_request' as any, { requestId: reqId });
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
refreshStatus();
|
||||
}, [refreshStatus]);
|
||||
|
||||
// RVS-Antworten verarbeiten
|
||||
useEffect(() => {
|
||||
const unsub = rvs.onMessage((msg: any) => {
|
||||
if (!msg) return;
|
||||
const p = msg.payload || {};
|
||||
if (msg.type === 'voice_id_status_response') {
|
||||
if (p.ok === false) {
|
||||
setStatus({ state: 'error', message: p.error || 'Whisper-Bridge nicht erreichbar' });
|
||||
return;
|
||||
}
|
||||
if (p.enrolled) {
|
||||
setStatus({
|
||||
state: 'enrolled',
|
||||
sampleCount: p.sample_count || 0,
|
||||
durations: p.sample_durations_s || [],
|
||||
updatedAt: p.updated_at || 0,
|
||||
dim: p.embedding_dim || 0,
|
||||
});
|
||||
} else {
|
||||
setStatus({ state: 'unenrolled' });
|
||||
}
|
||||
} else if (msg.type === 'voice_id_enroll_response') {
|
||||
setEnrollPending(false);
|
||||
if (p.ok === false) {
|
||||
Alert.alert('Enrollment fehlgeschlagen', p.error || 'Unbekannter Fehler');
|
||||
return;
|
||||
}
|
||||
const rejected = (p.rejected || []).length;
|
||||
ToastAndroid.show(
|
||||
`✓ Stimme gespeichert (${p.sample_count} Samples${rejected ? `, ${rejected} verworfen` : ''})`,
|
||||
ToastAndroid.LONG,
|
||||
);
|
||||
setSamples([]);
|
||||
refreshStatus();
|
||||
} else if (msg.type === 'voice_id_delete_response') {
|
||||
ToastAndroid.show(p.removed ? '✓ Stimme gelöscht' : 'Es war keine gespeichert', ToastAndroid.SHORT);
|
||||
refreshStatus();
|
||||
}
|
||||
});
|
||||
return () => unsub();
|
||||
}, [refreshStatus]);
|
||||
|
||||
// Ein Sample aufnehmen — fest 4s, dann auto-stop
|
||||
const recordSample = useCallback(async () => {
|
||||
if (recording || enrollPending) return;
|
||||
setRecording(true);
|
||||
setRecordCountdown(SAMPLE_DURATION_MS / 1000);
|
||||
try {
|
||||
const ok = await audioService.startRecording(false);
|
||||
if (!ok) {
|
||||
ToastAndroid.show('Aufnahme konnte nicht gestartet werden', ToastAndroid.LONG);
|
||||
setRecording(false);
|
||||
setRecordCountdown(0);
|
||||
return;
|
||||
}
|
||||
// Countdown-Timer (rein UI)
|
||||
const tickInterval = setInterval(() => {
|
||||
setRecordCountdown(c => Math.max(0, c - 1));
|
||||
}, 1000);
|
||||
// Auto-Stop nach festen 4s
|
||||
await new Promise(r => setTimeout(r, SAMPLE_DURATION_MS));
|
||||
clearInterval(tickInterval);
|
||||
const result = await audioService.stopRecording();
|
||||
setRecordCountdown(0);
|
||||
setRecording(false);
|
||||
if (!result || !result.base64) {
|
||||
ToastAndroid.show('Aufnahme leer — nochmal probieren', ToastAndroid.LONG);
|
||||
return;
|
||||
}
|
||||
setSamples(prev => [...prev, { base64: result.base64, durationMs: result.durationMs }]);
|
||||
} catch (err: any) {
|
||||
console.warn('[VoiceId] recordSample:', err);
|
||||
try { await audioService.cancelRecording(); } catch {}
|
||||
setRecording(false);
|
||||
setRecordCountdown(0);
|
||||
ToastAndroid.show('Aufnahmefehler: ' + (err?.message || err), ToastAndroid.LONG);
|
||||
}
|
||||
}, [recording, enrollPending]);
|
||||
|
||||
const removeSample = useCallback((idx: number) => {
|
||||
setSamples(prev => prev.filter((_, i) => i !== idx));
|
||||
}, []);
|
||||
|
||||
const sendEnrollment = useCallback(() => {
|
||||
if (samples.length < SAMPLES_REQUIRED) {
|
||||
Alert.alert('Noch nicht genug',
|
||||
`Bitte mindestens ${SAMPLES_REQUIRED} Samples aufnehmen — aktuell ${samples.length}.`);
|
||||
return;
|
||||
}
|
||||
if (enrollPending) return;
|
||||
setEnrollPending(true);
|
||||
const reqId = _newReqId('videnroll');
|
||||
rvs.send('voice_id_enroll_request' as any, {
|
||||
requestId: reqId,
|
||||
samples: samples.map(s => s.base64),
|
||||
});
|
||||
// Sicherheits-Timeout: wenn nach 60s nichts kommt, freigeben
|
||||
setTimeout(() => {
|
||||
setEnrollPending(prev => {
|
||||
if (prev) {
|
||||
ToastAndroid.show('Enrollment-Timeout — bitte erneut versuchen', ToastAndroid.LONG);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}, 60_000);
|
||||
}, [samples, enrollPending]);
|
||||
|
||||
const deleteFingerprint = useCallback(() => {
|
||||
Alert.alert(
|
||||
'Stimme löschen?',
|
||||
'Danach muss ARIA neu enrolled werden, sonst greift Speaker-ID-Filter nicht.',
|
||||
[
|
||||
{ text: 'Abbrechen', style: 'cancel' },
|
||||
{
|
||||
text: 'Löschen', style: 'destructive', onPress: () => {
|
||||
const reqId = _newReqId('viddel');
|
||||
rvs.send('voice_id_delete_request' as any, { requestId: reqId });
|
||||
},
|
||||
},
|
||||
],
|
||||
);
|
||||
}, []);
|
||||
|
||||
// ── Render ──────────────────────────────────────────────
|
||||
|
||||
return (
|
||||
<ScrollView contentContainerStyle={{ paddingBottom: 30 }}>
|
||||
<Text style={s.intro}>
|
||||
ARIA erkennt deine Stimme an einem Fingerprint (SpeechBrain ECAPA-TDNN, 192 Dimensionen).
|
||||
Andere Sprecher (TV, Hintergrund, andere Personen) werden gefiltert — keine Brain-Calls,
|
||||
keine Tokens. {'\n\n'}
|
||||
Sprich {SAMPLES_REQUIRED} Mal je {SAMPLE_DURATION_MS / 1000}s ganz normal — verschiedene
|
||||
Sätze, ruhige Umgebung empfohlen.
|
||||
</Text>
|
||||
|
||||
{/* Status-Karte */}
|
||||
<View style={s.card}>
|
||||
<Text style={s.cardLabel}>Status</Text>
|
||||
{status.state === 'loading' && (
|
||||
<View style={{ flexDirection: 'row', alignItems: 'center', gap: 8 }}>
|
||||
<ActivityIndicator color="#0096FF" />
|
||||
<Text style={s.statusText}>Wird abgefragt...</Text>
|
||||
</View>
|
||||
)}
|
||||
{status.state === 'unenrolled' && (
|
||||
<Text style={[s.statusText, { color: '#FFD60A' }]}>○ Nicht enrolled — Stimme einrichten ↓</Text>
|
||||
)}
|
||||
{status.state === 'enrolled' && (
|
||||
<>
|
||||
<Text style={[s.statusText, { color: '#34C759' }]}>
|
||||
✓ Enrolled — {status.sampleCount} Samples
|
||||
({status.durations.reduce((a, b) => a + b, 0).toFixed(1)}s gesamt)
|
||||
</Text>
|
||||
<Text style={s.statusSub}>
|
||||
Aktualisiert {new Date(status.updatedAt * 1000).toLocaleString('de-DE')} · dim={status.dim}
|
||||
</Text>
|
||||
</>
|
||||
)}
|
||||
{status.state === 'error' && (
|
||||
<Text style={[s.statusText, { color: '#FF6E6E' }]}>⚠ {status.message}</Text>
|
||||
)}
|
||||
</View>
|
||||
|
||||
{/* Aufnahme-Bereich */}
|
||||
<View style={s.card}>
|
||||
<Text style={s.cardLabel}>Samples ({samples.length}/{SAMPLES_REQUIRED})</Text>
|
||||
{samples.length === 0 && !recording && (
|
||||
<Text style={s.hint}>Tipp: sprich klare normale Sätze, je 3-4 Sekunden Audio.</Text>
|
||||
)}
|
||||
{samples.map((sample, idx) => (
|
||||
<View key={idx} style={s.sampleRow}>
|
||||
<Text style={s.sampleText}>
|
||||
Sample {idx + 1} · {(sample.durationMs / 1000).toFixed(1)}s
|
||||
</Text>
|
||||
<TouchableOpacity onPress={() => removeSample(idx)} disabled={enrollPending}>
|
||||
<Text style={{ color: '#FF6E6E', fontSize: 18 }}>✕</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
))}
|
||||
|
||||
<TouchableOpacity
|
||||
onPress={recordSample}
|
||||
disabled={recording || enrollPending}
|
||||
style={[s.recordBtn, (recording || enrollPending) && { opacity: 0.5 }]}
|
||||
>
|
||||
{recording ? (
|
||||
<>
|
||||
<ActivityIndicator color="#fff" />
|
||||
<Text style={s.recordBtnText}>Aufnahme läuft… {recordCountdown}s</Text>
|
||||
</>
|
||||
) : (
|
||||
<Text style={s.recordBtnText}>⏺ Sample {samples.length + 1} aufnehmen</Text>
|
||||
)}
|
||||
</TouchableOpacity>
|
||||
|
||||
{samples.length > 0 && !recording && (
|
||||
<TouchableOpacity
|
||||
onPress={() => setSamples([])}
|
||||
disabled={enrollPending}
|
||||
style={s.resetBtn}
|
||||
>
|
||||
<Text style={s.resetBtnText}>Alle verwerfen</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
</View>
|
||||
|
||||
{/* Aktionen */}
|
||||
<View style={{ flexDirection: 'row', gap: 8, marginTop: 8 }}>
|
||||
<TouchableOpacity
|
||||
onPress={sendEnrollment}
|
||||
disabled={samples.length < SAMPLES_REQUIRED || enrollPending}
|
||||
style={[
|
||||
s.primaryBtn,
|
||||
(samples.length < SAMPLES_REQUIRED || enrollPending) && { opacity: 0.4 },
|
||||
]}
|
||||
>
|
||||
{enrollPending ? (
|
||||
<>
|
||||
<ActivityIndicator color="#fff" />
|
||||
<Text style={s.primaryBtnText}>Wird verarbeitet…</Text>
|
||||
</>
|
||||
) : (
|
||||
<Text style={s.primaryBtnText}>
|
||||
✓ Speichern ({samples.length}/{SAMPLES_REQUIRED})
|
||||
</Text>
|
||||
)}
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
|
||||
{/* Verwaltung */}
|
||||
{status.state === 'enrolled' && (
|
||||
<View style={[s.card, { marginTop: 20 }]}>
|
||||
<Text style={s.cardLabel}>Verwaltung</Text>
|
||||
<TouchableOpacity onPress={refreshStatus} style={s.secondaryBtn}>
|
||||
<Text style={s.secondaryBtnText}>🔄 Status aktualisieren</Text>
|
||||
</TouchableOpacity>
|
||||
<TouchableOpacity onPress={deleteFingerprint} style={s.dangerBtn}>
|
||||
<Text style={s.dangerBtnText}>🗑 Fingerprint löschen (Re-Enrollment nötig)</Text>
|
||||
</TouchableOpacity>
|
||||
</View>
|
||||
)}
|
||||
</ScrollView>
|
||||
);
|
||||
};
|
||||
|
||||
const s = StyleSheet.create({
|
||||
intro: {
|
||||
color: '#8888AA',
|
||||
fontSize: 13,
|
||||
lineHeight: 19,
|
||||
marginBottom: 16,
|
||||
paddingHorizontal: 4,
|
||||
},
|
||||
card: {
|
||||
backgroundColor: 'rgba(30,30,46,0.6)',
|
||||
borderRadius: 8,
|
||||
padding: 14,
|
||||
marginBottom: 10,
|
||||
},
|
||||
cardLabel: {
|
||||
color: '#8888AA',
|
||||
fontSize: 11,
|
||||
fontWeight: '700',
|
||||
textTransform: 'uppercase',
|
||||
letterSpacing: 0.5,
|
||||
marginBottom: 8,
|
||||
},
|
||||
statusText: {
|
||||
color: '#E0E0F0',
|
||||
fontSize: 14,
|
||||
fontWeight: '600',
|
||||
},
|
||||
statusSub: {
|
||||
color: '#555570',
|
||||
fontSize: 11,
|
||||
marginTop: 4,
|
||||
},
|
||||
hint: {
|
||||
color: '#555570',
|
||||
fontSize: 12,
|
||||
fontStyle: 'italic',
|
||||
marginBottom: 8,
|
||||
},
|
||||
sampleRow: {
|
||||
flexDirection: 'row',
|
||||
justifyContent: 'space-between',
|
||||
alignItems: 'center',
|
||||
paddingVertical: 6,
|
||||
borderBottomWidth: 1,
|
||||
borderColor: '#2A2A3E',
|
||||
},
|
||||
sampleText: {
|
||||
color: '#E0E0F0',
|
||||
fontSize: 13,
|
||||
},
|
||||
recordBtn: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
gap: 8,
|
||||
backgroundColor: '#E55C5C',
|
||||
borderRadius: 8,
|
||||
paddingVertical: 14,
|
||||
marginTop: 12,
|
||||
},
|
||||
recordBtnText: {
|
||||
color: '#fff',
|
||||
fontSize: 15,
|
||||
fontWeight: '700',
|
||||
},
|
||||
resetBtn: {
|
||||
alignItems: 'center',
|
||||
paddingVertical: 8,
|
||||
marginTop: 6,
|
||||
},
|
||||
resetBtnText: {
|
||||
color: '#FFD60A',
|
||||
fontSize: 12,
|
||||
},
|
||||
primaryBtn: {
|
||||
flex: 1,
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
gap: 8,
|
||||
backgroundColor: '#34C759',
|
||||
borderRadius: 8,
|
||||
paddingVertical: 14,
|
||||
},
|
||||
primaryBtnText: {
|
||||
color: '#fff',
|
||||
fontSize: 15,
|
||||
fontWeight: '700',
|
||||
},
|
||||
secondaryBtn: {
|
||||
backgroundColor: 'rgba(0,150,255,0.15)',
|
||||
borderRadius: 6,
|
||||
paddingVertical: 10,
|
||||
alignItems: 'center',
|
||||
marginTop: 6,
|
||||
},
|
||||
secondaryBtnText: {
|
||||
color: '#0096FF',
|
||||
fontSize: 13,
|
||||
fontWeight: '600',
|
||||
},
|
||||
dangerBtn: {
|
||||
backgroundColor: 'rgba(229,92,92,0.15)',
|
||||
borderRadius: 6,
|
||||
paddingVertical: 10,
|
||||
alignItems: 'center',
|
||||
marginTop: 6,
|
||||
},
|
||||
dangerBtnText: {
|
||||
color: '#E55C5C',
|
||||
fontSize: 13,
|
||||
fontWeight: '600',
|
||||
},
|
||||
});
|
||||
|
||||
export default VoiceIdEnrollment;
|
||||
@@ -91,6 +91,7 @@ import MemoryBrowser from '../components/MemoryBrowser';
|
||||
import TriggerBrowser from '../components/TriggerBrowser';
|
||||
import SkillBrowser from '../components/SkillBrowser';
|
||||
import OAuthBrowser from '../components/OAuthBrowser';
|
||||
import VoiceIdEnrollment from '../components/VoiceIdEnrollment';
|
||||
import { isVerboseLogging, setVerboseLogging, isDebugLogsToBridge, setDebugLogsToBridge, APP_LOG_EVENT } from '../services/logger';
|
||||
import {
|
||||
isWakeReadySoundEnabled,
|
||||
@@ -136,6 +137,7 @@ const SETTINGS_SECTIONS = [
|
||||
{ id: 'general', icon: '⚙️', label: 'Allgemein', desc: 'Betriebsmodus, GPS-Standort' },
|
||||
{ id: 'voice_input', icon: '🎙️', label: 'Spracheingabe', desc: 'Stille-Toleranz, Aufnahmedauer' },
|
||||
{ id: 'wake_word', icon: '👂', label: 'Wake-Word', desc: 'Wake-Word-Auswahl' },
|
||||
{ id: 'voice_id', icon: '🎤', label: 'Stimme einrichten', desc: 'Sprecher-Erkennung — nur deine Stimme triggert ARIA' },
|
||||
{ id: 'voice_output', icon: '🔊', label: 'Sprachausgabe', desc: 'Stimmen, Pre-Roll, Geschwindigkeit' },
|
||||
{ id: 'storage', icon: '📁', label: 'Speicher', desc: 'Anhang-Speicherort, Auto-Download' },
|
||||
{ id: 'files', icon: '📂', label: 'Dateien', desc: 'ARIA- und User-Dateien — anzeigen, löschen' },
|
||||
@@ -1836,6 +1838,12 @@ const SettingsScreen: React.FC = () => {
|
||||
</View>
|
||||
</>)}
|
||||
|
||||
{/* === Voice-ID Enrollment (Sprecher-Erkennung) === */}
|
||||
{currentSection === 'voice_id' && (<>
|
||||
<Text style={styles.sectionTitle}>Stimme einrichten</Text>
|
||||
<VoiceIdEnrollment />
|
||||
</>)}
|
||||
|
||||
{/* === Sprachausgabe (geraetelokal) === */}
|
||||
{currentSection === 'voice_output' && (<>
|
||||
<Text style={styles.sectionTitle}>Sprachausgabe</Text>
|
||||
|
||||
Reference in New Issue
Block a user