/** * VoiceCloneModal — Eigene Stimme aufnehmen und an XTTS uploaden. * * Flow: * - Modal zeigt Vorlesetext (>30s Lesedauer) + Aufnahme-Button * - Bei Aufnahme: max 30s, Fortschrittsbalken, Countdown * - Bei Stop: Name abfragen, dann als voice_upload ueber RVS schicken * - XTTS-Bridge speichert /voices/.wav, antwortet mit xtts_voice_saved */ import React, { useCallback, useEffect, useRef, useState } from 'react'; import { Modal, View, Text, TouchableOpacity, StyleSheet, Alert, ScrollView, ActivityIndicator, TextInput, } from 'react-native'; import audioService from '../services/audio'; import rvs from '../services/rvs'; interface Props { visible: boolean; onClose: () => void; } const SAMPLE_TEXT = `Das ist meine eigene Stimme fuer ARIA. Ich lese jetzt einen laengeren Absatz laut vor, damit das Voice-Cloning eine gute Grundlage hat. Guten Tag, ich heisse Stefan und baue gerade mit grosser Begeisterung an meinem persoenlichen KI-Assistenten. Wir automatisieren Infrastruktur, managen Sessions und spielen mit Sprachsynthese. Die letzten Jahre habe ich viel gelernt, vor allem dass Geduld genauso wichtig ist wie Neugier. Hoert sich das jetzt an wie ich selbst? Wenn alles klappt, spricht ARIA bald mit dieser Stimme.`; const MAX_DURATION_MS = 30000; const TARGET_DURATION_MS = 15000; const VoiceCloneModal: React.FC = ({ visible, onClose }) => { const [recording, setRecording] = useState(false); const [durationMs, setDurationMs] = useState(0); const [voiceName, setVoiceName] = useState(''); const [processing, setProcessing] = useState(false); const [recordingPath, setRecordingPath] = useState(''); const timerRef = useRef | null>(null); const startTimeRef = useRef(0); // Zustand zuruecksetzen wenn Modal schliesst/oeffnet useEffect(() => { if (!visible) { setRecording(false); setDurationMs(0); setVoiceName(''); setProcessing(false); setRecordingPath(''); if (timerRef.current) clearInterval(timerRef.current); } }, [visible]); // Cleanup bei Unmount useEffect(() => { return () => { if (timerRef.current) clearInterval(timerRef.current); if (recording) audioService.stopRecording().catch(() => {}); }; }, [recording]); const startRecording = useCallback(async () => { // Frische Aufnahme setDurationMs(0); setRecordingPath(''); const ok = await audioService.startRecording(false); if (!ok) { Alert.alert('Fehler', 'Aufnahme konnte nicht gestartet werden (Mikrofon-Berechtigung?)'); return; } setRecording(true); startTimeRef.current = Date.now(); timerRef.current = setInterval(async () => { const elapsed = Date.now() - startTimeRef.current; setDurationMs(elapsed); if (elapsed >= MAX_DURATION_MS) { await stopRecording(); } }, 100); }, []); const stopRecording = useCallback(async () => { if (timerRef.current) { clearInterval(timerRef.current); timerRef.current = null; } if (!recording) return; const result = await audioService.stopRecording(); setRecording(false); if (!result) { Alert.alert('Keine Sprache erkannt', 'Versuch es bitte nochmal — sprich bis der Timer mindestens 10 Sekunden anzeigt.'); setDurationMs(0); return; } // Temp-Datei wurde schon geloescht (stopRecording cleaned up). // Wir brauchen aber base64 aus result direkt fuers Upload. // result.base64 ist bereits da. setRecordingPath(result.base64); }, [recording]); const uploadVoice = useCallback(async () => { const name = voiceName.trim(); if (!name) { Alert.alert('Name fehlt', 'Bitte gib der Stimme einen Namen (nur Buchstaben, Zahlen, _ und -).'); return; } if (!/^[a-zA-Z0-9_-]+$/.test(name)) { Alert.alert('Ungueltiger Name', 'Nur Buchstaben, Zahlen, _ und - erlaubt.'); return; } if (!recordingPath) { Alert.alert('Keine Aufnahme', 'Bitte zuerst aufnehmen.'); return; } setProcessing(true); try { // voice_upload erwartet samples als Array mit base64 (aus Diagnostic-Format kopiert) rvs.send('voice_upload' as any, { name, samples: [{ base64: recordingPath }], }); Alert.alert('Hochgeladen', `Stimme "${name}" wird vom XTTS-Server verarbeitet. Nach ein paar Sekunden in der Liste verfuegbar.`); onClose(); } catch (err: any) { Alert.alert('Fehler', err.message); } finally { setProcessing(false); } }, [voiceName, recordingPath, onClose]); const progress = Math.min(durationMs / MAX_DURATION_MS, 1); const sec = Math.floor(durationMs / 1000); const enoughRecorded = durationMs >= TARGET_DURATION_MS; return ( Eigene Stimme aufnehmen {'\u2715'} Lies den Text laut und deutlich vor. Maximal 30 Sekunden. Je mehr du sprichst (ziel: bis zum Ende des Textes, ca. 20-30s), desto besser wird die geklonte Stimme. {SAMPLE_TEXT} {/* Timer + Fortschritt */} {sec.toString().padStart(2, '0')} / 30 s {/* Aufnahme-Button */} {!recordingPath && ( {recording ? '\u25A0' : '\u25CF'} {recording ? 'Stop' : 'Aufnahme starten'} )} {/* Nach Aufnahme: Name + Upload */} {recordingPath && ( Aufnahme ({sec}s) fertig. Vergib einen Namen und lade hoch. { setRecordingPath(''); setDurationMs(0); }} > Nochmal aufnehmen {processing ? : Hochladen } )} {recording && !enoughRecorded && ( Bitte weiter lesen — mindestens 15 Sekunden )} {recording && enoughRecorded && ( Genug Audio fuer eine gute Clonung. Du kannst stoppen. )} ); }; const styles = StyleSheet.create({ container: { flex: 1, backgroundColor: '#0D0D1A', }, header: { flexDirection: 'row', alignItems: 'center', justifyContent: 'space-between', paddingHorizontal: 16, paddingTop: 48, paddingBottom: 16, borderBottomWidth: 1, borderBottomColor: '#1E1E2E', }, title: { color: '#FFFFFF', fontSize: 18, fontWeight: '700', }, closeX: { color: '#8888AA', fontSize: 24, paddingHorizontal: 8, }, content: { flex: 1, }, hint: { color: '#8888AA', fontSize: 13, lineHeight: 20, }, sampleTextBox: { marginTop: 12, padding: 14, backgroundColor: '#12122A', borderRadius: 10, borderWidth: 1, borderColor: '#1E1E2E', }, sampleText: { color: '#E0E0F0', fontSize: 15, lineHeight: 24, }, timer: { color: '#666680', fontSize: 42, fontWeight: '700', fontVariant: ['tabular-nums'], }, timerActive: { color: '#FF3B30', }, progressBar: { marginTop: 8, width: '100%', height: 8, backgroundColor: '#1E1E2E', borderRadius: 4, overflow: 'hidden', }, progressFill: { height: '100%', }, recordBtn: { marginTop: 24, flexDirection: 'row', alignItems: 'center', justifyContent: 'center', gap: 12, backgroundColor: '#1E1E2E', borderRadius: 12, padding: 18, borderWidth: 2, borderColor: '#34C759', }, recordBtnActive: { borderColor: '#FF3B30', backgroundColor: 'rgba(255,59,48,0.15)', }, recordIcon: { color: '#FF3B30', fontSize: 24, fontWeight: '700', }, recordLabel: { color: '#FFFFFF', fontSize: 17, fontWeight: '600', }, nameInput: { marginTop: 10, backgroundColor: '#1E1E2E', borderRadius: 8, paddingHorizontal: 14, paddingVertical: 12, color: '#FFFFFF', fontSize: 15, borderWidth: 1, borderColor: '#2A2A3E', }, primaryBtn: { backgroundColor: '#0096FF', borderRadius: 10, padding: 14, alignItems: 'center', }, primaryBtnText: { color: '#FFFFFF', fontSize: 15, fontWeight: '700', }, secondaryBtn: { backgroundColor: '#1E1E2E', borderRadius: 10, padding: 14, alignItems: 'center', borderWidth: 1, borderColor: '#2A2A3E', }, secondaryBtnText: { color: '#8888AA', fontSize: 14, fontWeight: '600', }, }); export default VoiceCloneModal;