e3fe27f736
App-Seite: - VoiceIdEnrollment.tsx (neue Komponente, ~370 Zeilen): Status-Karte (loading/unenrolled/enrolled/error), Sample-Recorder mit Countdown (4s fest pro Sample), Liste mit einzelnem Loeschen, Save-Button (disabled bis 5 Samples), Fingerprint-Delete mit Confirm. - SettingsScreen.tsx: neue Section 🎤 'Stimme einrichten' zwischen Wake-Word und Sprachausgabe. - Sample-Format: WAV via audioService.startRecording — wird whisper-bridge-seitig per wave-Modul gestrippt. Diagnostic-Seite: - Neue settings-section 'Voice-ID (Sprecher-Erkennung)': Status-Anzeige (live ueber voice_id_status_response), Threshold-Slider 0.30-0.70 (persistiert in voice_config.json, broadcast als config-Message), Refresh + Delete-Button. - server.js: 2 neue actions (voice_id_status, voice_id_delete), send_voice_config nimmt voiceIdThreshold mit auf. Backend: - speaker_id.py: _normalize_audio_bytes erkennt jetzt WAV-Header (RIFF/WAVE) und strippt auf rohes PCM — sonst werfen die ECAPA- Embeddings auf den 44-Byte-Header rein. - bridge.py: config-Broadcast-Handler setzt voiceIdThreshold auf speaker_id.DEFAULT_THRESHOLD (wird erst in Phase 3 beim Gating genutzt, persistiert aber schon). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
427 lines
13 KiB
TypeScript
427 lines
13 KiB
TypeScript
/**
|
|
* Voice-ID Enrollment + Status — App-seitig.
|
|
*
|
|
* User nimmt 5-7 Samples (je 4s) seiner Stimme auf, App schickt sie an
|
|
* die whisper-bridge via RVS (voice_id_enroll_request). Bridge berechnet
|
|
* SpeechBrain-ECAPA-Embeddings, mittelt sie zu einem Fingerprint, speichert
|
|
* /voice-id/fingerprint.json.
|
|
*
|
|
* Verwendung: in SettingsScreen für Section 'voice_id' eingebunden.
|
|
* Holt Status bei Mount + nach jedem Enroll/Delete neu ab.
|
|
*/
|
|
|
|
import React, { useCallback, useEffect, useState } from 'react';
|
|
import {
|
|
ActivityIndicator,
|
|
Alert,
|
|
ScrollView,
|
|
StyleSheet,
|
|
Text,
|
|
ToastAndroid,
|
|
TouchableOpacity,
|
|
View,
|
|
} from 'react-native';
|
|
|
|
import audioService from '../services/audio';
|
|
import rvs from '../services/rvs';
|
|
|
|
const SAMPLE_DURATION_MS = 4000; // Pro Sample 4s aufnehmen
|
|
const SAMPLES_REQUIRED = 5; // Mindest-Sampleanzahl fuer Save
|
|
|
|
type Sample = {
|
|
base64: string;
|
|
durationMs: number;
|
|
};
|
|
|
|
type Status =
|
|
| { state: 'loading' }
|
|
| { state: 'unenrolled' }
|
|
| { state: 'enrolled'; sampleCount: number; durations: number[]; updatedAt: number; dim: number }
|
|
| { state: 'error'; message: string };
|
|
|
|
function _newReqId(prefix: string): string {
|
|
return `${prefix}_${Date.now().toString(36)}_${Math.floor(Math.random() * 1e6).toString(36)}`;
|
|
}
|
|
|
|
export const VoiceIdEnrollment: React.FC = () => {
|
|
const [status, setStatus] = useState<Status>({ state: 'loading' });
|
|
const [samples, setSamples] = useState<Sample[]>([]);
|
|
const [recording, setRecording] = useState(false);
|
|
const [recordCountdown, setRecordCountdown] = useState(0);
|
|
const [enrollPending, setEnrollPending] = useState(false);
|
|
const [pendingReqId, setPendingReqId] = useState<string | null>(null);
|
|
|
|
// Status laden
|
|
const refreshStatus = useCallback(() => {
|
|
setStatus({ state: 'loading' });
|
|
const reqId = _newReqId('vid');
|
|
setPendingReqId(reqId);
|
|
rvs.send('voice_id_status_request' as any, { requestId: reqId });
|
|
}, []);
|
|
|
|
useEffect(() => {
|
|
refreshStatus();
|
|
}, [refreshStatus]);
|
|
|
|
// RVS-Antworten verarbeiten
|
|
useEffect(() => {
|
|
const unsub = rvs.onMessage((msg: any) => {
|
|
if (!msg) return;
|
|
const p = msg.payload || {};
|
|
if (msg.type === 'voice_id_status_response') {
|
|
if (p.ok === false) {
|
|
setStatus({ state: 'error', message: p.error || 'Whisper-Bridge nicht erreichbar' });
|
|
return;
|
|
}
|
|
if (p.enrolled) {
|
|
setStatus({
|
|
state: 'enrolled',
|
|
sampleCount: p.sample_count || 0,
|
|
durations: p.sample_durations_s || [],
|
|
updatedAt: p.updated_at || 0,
|
|
dim: p.embedding_dim || 0,
|
|
});
|
|
} else {
|
|
setStatus({ state: 'unenrolled' });
|
|
}
|
|
} else if (msg.type === 'voice_id_enroll_response') {
|
|
setEnrollPending(false);
|
|
if (p.ok === false) {
|
|
Alert.alert('Enrollment fehlgeschlagen', p.error || 'Unbekannter Fehler');
|
|
return;
|
|
}
|
|
const rejected = (p.rejected || []).length;
|
|
ToastAndroid.show(
|
|
`✓ Stimme gespeichert (${p.sample_count} Samples${rejected ? `, ${rejected} verworfen` : ''})`,
|
|
ToastAndroid.LONG,
|
|
);
|
|
setSamples([]);
|
|
refreshStatus();
|
|
} else if (msg.type === 'voice_id_delete_response') {
|
|
ToastAndroid.show(p.removed ? '✓ Stimme gelöscht' : 'Es war keine gespeichert', ToastAndroid.SHORT);
|
|
refreshStatus();
|
|
}
|
|
});
|
|
return () => unsub();
|
|
}, [refreshStatus]);
|
|
|
|
// Ein Sample aufnehmen — fest 4s, dann auto-stop
|
|
const recordSample = useCallback(async () => {
|
|
if (recording || enrollPending) return;
|
|
setRecording(true);
|
|
setRecordCountdown(SAMPLE_DURATION_MS / 1000);
|
|
try {
|
|
const ok = await audioService.startRecording(false);
|
|
if (!ok) {
|
|
ToastAndroid.show('Aufnahme konnte nicht gestartet werden', ToastAndroid.LONG);
|
|
setRecording(false);
|
|
setRecordCountdown(0);
|
|
return;
|
|
}
|
|
// Countdown-Timer (rein UI)
|
|
const tickInterval = setInterval(() => {
|
|
setRecordCountdown(c => Math.max(0, c - 1));
|
|
}, 1000);
|
|
// Auto-Stop nach festen 4s
|
|
await new Promise(r => setTimeout(r, SAMPLE_DURATION_MS));
|
|
clearInterval(tickInterval);
|
|
const result = await audioService.stopRecording();
|
|
setRecordCountdown(0);
|
|
setRecording(false);
|
|
if (!result || !result.base64) {
|
|
ToastAndroid.show('Aufnahme leer — nochmal probieren', ToastAndroid.LONG);
|
|
return;
|
|
}
|
|
setSamples(prev => [...prev, { base64: result.base64, durationMs: result.durationMs }]);
|
|
} catch (err: any) {
|
|
console.warn('[VoiceId] recordSample:', err);
|
|
try { await audioService.cancelRecording(); } catch {}
|
|
setRecording(false);
|
|
setRecordCountdown(0);
|
|
ToastAndroid.show('Aufnahmefehler: ' + (err?.message || err), ToastAndroid.LONG);
|
|
}
|
|
}, [recording, enrollPending]);
|
|
|
|
const removeSample = useCallback((idx: number) => {
|
|
setSamples(prev => prev.filter((_, i) => i !== idx));
|
|
}, []);
|
|
|
|
const sendEnrollment = useCallback(() => {
|
|
if (samples.length < SAMPLES_REQUIRED) {
|
|
Alert.alert('Noch nicht genug',
|
|
`Bitte mindestens ${SAMPLES_REQUIRED} Samples aufnehmen — aktuell ${samples.length}.`);
|
|
return;
|
|
}
|
|
if (enrollPending) return;
|
|
setEnrollPending(true);
|
|
const reqId = _newReqId('videnroll');
|
|
rvs.send('voice_id_enroll_request' as any, {
|
|
requestId: reqId,
|
|
samples: samples.map(s => s.base64),
|
|
});
|
|
// Sicherheits-Timeout: wenn nach 60s nichts kommt, freigeben
|
|
setTimeout(() => {
|
|
setEnrollPending(prev => {
|
|
if (prev) {
|
|
ToastAndroid.show('Enrollment-Timeout — bitte erneut versuchen', ToastAndroid.LONG);
|
|
}
|
|
return false;
|
|
});
|
|
}, 60_000);
|
|
}, [samples, enrollPending]);
|
|
|
|
const deleteFingerprint = useCallback(() => {
|
|
Alert.alert(
|
|
'Stimme löschen?',
|
|
'Danach muss ARIA neu enrolled werden, sonst greift Speaker-ID-Filter nicht.',
|
|
[
|
|
{ text: 'Abbrechen', style: 'cancel' },
|
|
{
|
|
text: 'Löschen', style: 'destructive', onPress: () => {
|
|
const reqId = _newReqId('viddel');
|
|
rvs.send('voice_id_delete_request' as any, { requestId: reqId });
|
|
},
|
|
},
|
|
],
|
|
);
|
|
}, []);
|
|
|
|
// ── Render ──────────────────────────────────────────────
|
|
|
|
return (
|
|
<ScrollView contentContainerStyle={{ paddingBottom: 30 }}>
|
|
<Text style={s.intro}>
|
|
ARIA erkennt deine Stimme an einem Fingerprint (SpeechBrain ECAPA-TDNN, 192 Dimensionen).
|
|
Andere Sprecher (TV, Hintergrund, andere Personen) werden gefiltert — keine Brain-Calls,
|
|
keine Tokens. {'\n\n'}
|
|
Sprich {SAMPLES_REQUIRED} Mal je {SAMPLE_DURATION_MS / 1000}s ganz normal — verschiedene
|
|
Sätze, ruhige Umgebung empfohlen.
|
|
</Text>
|
|
|
|
{/* Status-Karte */}
|
|
<View style={s.card}>
|
|
<Text style={s.cardLabel}>Status</Text>
|
|
{status.state === 'loading' && (
|
|
<View style={{ flexDirection: 'row', alignItems: 'center', gap: 8 }}>
|
|
<ActivityIndicator color="#0096FF" />
|
|
<Text style={s.statusText}>Wird abgefragt...</Text>
|
|
</View>
|
|
)}
|
|
{status.state === 'unenrolled' && (
|
|
<Text style={[s.statusText, { color: '#FFD60A' }]}>○ Nicht enrolled — Stimme einrichten ↓</Text>
|
|
)}
|
|
{status.state === 'enrolled' && (
|
|
<>
|
|
<Text style={[s.statusText, { color: '#34C759' }]}>
|
|
✓ Enrolled — {status.sampleCount} Samples
|
|
({status.durations.reduce((a, b) => a + b, 0).toFixed(1)}s gesamt)
|
|
</Text>
|
|
<Text style={s.statusSub}>
|
|
Aktualisiert {new Date(status.updatedAt * 1000).toLocaleString('de-DE')} · dim={status.dim}
|
|
</Text>
|
|
</>
|
|
)}
|
|
{status.state === 'error' && (
|
|
<Text style={[s.statusText, { color: '#FF6E6E' }]}>⚠ {status.message}</Text>
|
|
)}
|
|
</View>
|
|
|
|
{/* Aufnahme-Bereich */}
|
|
<View style={s.card}>
|
|
<Text style={s.cardLabel}>Samples ({samples.length}/{SAMPLES_REQUIRED})</Text>
|
|
{samples.length === 0 && !recording && (
|
|
<Text style={s.hint}>Tipp: sprich klare normale Sätze, je 3-4 Sekunden Audio.</Text>
|
|
)}
|
|
{samples.map((sample, idx) => (
|
|
<View key={idx} style={s.sampleRow}>
|
|
<Text style={s.sampleText}>
|
|
Sample {idx + 1} · {(sample.durationMs / 1000).toFixed(1)}s
|
|
</Text>
|
|
<TouchableOpacity onPress={() => removeSample(idx)} disabled={enrollPending}>
|
|
<Text style={{ color: '#FF6E6E', fontSize: 18 }}>✕</Text>
|
|
</TouchableOpacity>
|
|
</View>
|
|
))}
|
|
|
|
<TouchableOpacity
|
|
onPress={recordSample}
|
|
disabled={recording || enrollPending}
|
|
style={[s.recordBtn, (recording || enrollPending) && { opacity: 0.5 }]}
|
|
>
|
|
{recording ? (
|
|
<>
|
|
<ActivityIndicator color="#fff" />
|
|
<Text style={s.recordBtnText}>Aufnahme läuft… {recordCountdown}s</Text>
|
|
</>
|
|
) : (
|
|
<Text style={s.recordBtnText}>⏺ Sample {samples.length + 1} aufnehmen</Text>
|
|
)}
|
|
</TouchableOpacity>
|
|
|
|
{samples.length > 0 && !recording && (
|
|
<TouchableOpacity
|
|
onPress={() => setSamples([])}
|
|
disabled={enrollPending}
|
|
style={s.resetBtn}
|
|
>
|
|
<Text style={s.resetBtnText}>Alle verwerfen</Text>
|
|
</TouchableOpacity>
|
|
)}
|
|
</View>
|
|
|
|
{/* Aktionen */}
|
|
<View style={{ flexDirection: 'row', gap: 8, marginTop: 8 }}>
|
|
<TouchableOpacity
|
|
onPress={sendEnrollment}
|
|
disabled={samples.length < SAMPLES_REQUIRED || enrollPending}
|
|
style={[
|
|
s.primaryBtn,
|
|
(samples.length < SAMPLES_REQUIRED || enrollPending) && { opacity: 0.4 },
|
|
]}
|
|
>
|
|
{enrollPending ? (
|
|
<>
|
|
<ActivityIndicator color="#fff" />
|
|
<Text style={s.primaryBtnText}>Wird verarbeitet…</Text>
|
|
</>
|
|
) : (
|
|
<Text style={s.primaryBtnText}>
|
|
✓ Speichern ({samples.length}/{SAMPLES_REQUIRED})
|
|
</Text>
|
|
)}
|
|
</TouchableOpacity>
|
|
</View>
|
|
|
|
{/* Verwaltung */}
|
|
{status.state === 'enrolled' && (
|
|
<View style={[s.card, { marginTop: 20 }]}>
|
|
<Text style={s.cardLabel}>Verwaltung</Text>
|
|
<TouchableOpacity onPress={refreshStatus} style={s.secondaryBtn}>
|
|
<Text style={s.secondaryBtnText}>🔄 Status aktualisieren</Text>
|
|
</TouchableOpacity>
|
|
<TouchableOpacity onPress={deleteFingerprint} style={s.dangerBtn}>
|
|
<Text style={s.dangerBtnText}>🗑 Fingerprint löschen (Re-Enrollment nötig)</Text>
|
|
</TouchableOpacity>
|
|
</View>
|
|
)}
|
|
</ScrollView>
|
|
);
|
|
};
|
|
|
|
const s = StyleSheet.create({
|
|
intro: {
|
|
color: '#8888AA',
|
|
fontSize: 13,
|
|
lineHeight: 19,
|
|
marginBottom: 16,
|
|
paddingHorizontal: 4,
|
|
},
|
|
card: {
|
|
backgroundColor: 'rgba(30,30,46,0.6)',
|
|
borderRadius: 8,
|
|
padding: 14,
|
|
marginBottom: 10,
|
|
},
|
|
cardLabel: {
|
|
color: '#8888AA',
|
|
fontSize: 11,
|
|
fontWeight: '700',
|
|
textTransform: 'uppercase',
|
|
letterSpacing: 0.5,
|
|
marginBottom: 8,
|
|
},
|
|
statusText: {
|
|
color: '#E0E0F0',
|
|
fontSize: 14,
|
|
fontWeight: '600',
|
|
},
|
|
statusSub: {
|
|
color: '#555570',
|
|
fontSize: 11,
|
|
marginTop: 4,
|
|
},
|
|
hint: {
|
|
color: '#555570',
|
|
fontSize: 12,
|
|
fontStyle: 'italic',
|
|
marginBottom: 8,
|
|
},
|
|
sampleRow: {
|
|
flexDirection: 'row',
|
|
justifyContent: 'space-between',
|
|
alignItems: 'center',
|
|
paddingVertical: 6,
|
|
borderBottomWidth: 1,
|
|
borderColor: '#2A2A3E',
|
|
},
|
|
sampleText: {
|
|
color: '#E0E0F0',
|
|
fontSize: 13,
|
|
},
|
|
recordBtn: {
|
|
flexDirection: 'row',
|
|
alignItems: 'center',
|
|
justifyContent: 'center',
|
|
gap: 8,
|
|
backgroundColor: '#E55C5C',
|
|
borderRadius: 8,
|
|
paddingVertical: 14,
|
|
marginTop: 12,
|
|
},
|
|
recordBtnText: {
|
|
color: '#fff',
|
|
fontSize: 15,
|
|
fontWeight: '700',
|
|
},
|
|
resetBtn: {
|
|
alignItems: 'center',
|
|
paddingVertical: 8,
|
|
marginTop: 6,
|
|
},
|
|
resetBtnText: {
|
|
color: '#FFD60A',
|
|
fontSize: 12,
|
|
},
|
|
primaryBtn: {
|
|
flex: 1,
|
|
flexDirection: 'row',
|
|
alignItems: 'center',
|
|
justifyContent: 'center',
|
|
gap: 8,
|
|
backgroundColor: '#34C759',
|
|
borderRadius: 8,
|
|
paddingVertical: 14,
|
|
},
|
|
primaryBtnText: {
|
|
color: '#fff',
|
|
fontSize: 15,
|
|
fontWeight: '700',
|
|
},
|
|
secondaryBtn: {
|
|
backgroundColor: 'rgba(0,150,255,0.15)',
|
|
borderRadius: 6,
|
|
paddingVertical: 10,
|
|
alignItems: 'center',
|
|
marginTop: 6,
|
|
},
|
|
secondaryBtnText: {
|
|
color: '#0096FF',
|
|
fontSize: 13,
|
|
fontWeight: '600',
|
|
},
|
|
dangerBtn: {
|
|
backgroundColor: 'rgba(229,92,92,0.15)',
|
|
borderRadius: 6,
|
|
paddingVertical: 10,
|
|
alignItems: 'center',
|
|
marginTop: 6,
|
|
},
|
|
dangerBtnText: {
|
|
color: '#E55C5C',
|
|
fontSize: 13,
|
|
fontWeight: '600',
|
|
},
|
|
});
|
|
|
|
export default VoiceIdEnrollment;
|