feat: App XTTS-Voice-Auswahl + Aufnahme + Loeschen (geraetelokal)

App Settings: Voice-Sektion (nur wenn TTS an)
- Liste aller XTTS-Server-Stimmen mit Auswahl-Radio + X zum Loeschen
- 'Standard' fuer Diagnostic-Default-Voice (keine lokale Ueberschreibung)
- 'Aktualisieren' Button laedt Liste neu (xtts_list_voices via RVS)
- 'Eigene Stimme aufnehmen' oeffnet VoiceCloneModal

VoiceCloneModal: 30s Aufnahme + Upload
- Vorlese-Text (>30s Lesedauer, thematisch passend)
- Rot-pulsierender Stop-Button, live Timer + Progressbar
- Auto-Stop bei 30s, Hinweise ab 15s ('genug fuer gute Clonung')
- Nach Stop: Namenseingabe (a-Z, 0-9, _, -), Upload via voice_upload
- Nach Upload: Modal schliesst, Settings bekommt xtts_voice_saved
  und setzt automatisch die neue Stimme als gewaehlt

Voice-Flow App → Bridge → XTTS (geraetelokal):
- Jeder chat/audio/tts_request schickt aria_xtts_voice (AsyncStorage)
  mit der Message mit
- Bridge speichert _next_voice_override bei chat/audio Empfang,
  nutzt es fuer die naechste ARIA-Antwort und resettet dann
- Fallback: globale xtts_voice aus voice_config.json (Diagnostic)

Ergebnis:
- Gerat A hat 'stefan' geclont → ARIA antwortet Geraet A mit stefan
- Gerat B hat nichts gewaehlt → ARIA antwortet Geraet B mit Default
- Diagnostic-Einstellung wirkt als fallback-default fuer neue Geraete

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-19 22:48:24 +02:00
parent fc2438be2d
commit 99cb83202e
4 changed files with 543 additions and 3 deletions
+9 -2
View File
@@ -110,6 +110,8 @@ const ChatScreen: React.FC = () => {
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
const [ttsMuted, setTtsMuted] = useState(false);
// Gerätelokale XTTS-Voice-Wahl (bevorzugt gegenueber dem globalen Default)
const localXttsVoiceRef = useRef<string>('');
const flatListRef = useRef<FlatList>(null);
const messageIdCounter = useRef(0);
@@ -127,6 +129,8 @@ const ChatScreen: React.FC = () => {
setTtsDeviceEnabled(enabled !== 'false'); // default true
const muted = await AsyncStorage.getItem('aria_tts_muted');
setTtsMuted(muted === 'true'); // default false
const voice = await AsyncStorage.getItem('aria_xtts_voice');
localXttsVoiceRef.current = voice || '';
};
loadTtsSettings();
// Poll alle 2s um Settings-Aenderung mitzubekommen (einfache Loesung ohne Context)
@@ -386,6 +390,7 @@ const ChatScreen: React.FC = () => {
base64: result.base64,
durationMs: result.durationMs,
mimeType: result.mimeType,
voice: localXttsVoiceRef.current,
...(location && { location }),
});
}
@@ -488,9 +493,10 @@ const ChatScreen: React.FC = () => {
};
setMessages(prev => capMessages([...prev, userMsg]));
// An RVS senden
// An RVS senden — mit geraetelokaler Voice (Bridge nutzt sie fuer die Antwort)
rvs.send('chat', {
text,
voice: localXttsVoiceRef.current,
...(location && { location }),
});
}, [inputText, getCurrentLocation, pendingAttachments, sendPendingAttachments]);
@@ -599,6 +605,7 @@ const ChatScreen: React.FC = () => {
if (messageText) {
rvs.send('chat', {
text: messageText,
voice: localXttsVoiceRef.current,
...(location && { location }),
});
}
@@ -689,7 +696,7 @@ const ChatScreen: React.FC = () => {
// wieder mit der Nachricht verknuepft (fuer den naechsten Replay aus Cache)
rvs.send('tts_request' as any, {
text: item.text,
voice: '',
voice: localXttsVoiceRef.current,
messageId: item.messageId || '',
});
}
+167 -1
View File
@@ -22,6 +22,7 @@ import DocumentPicker from 'react-native-document-picker';
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
import ModeSelector from '../components/ModeSelector';
import QRScanner from '../components/QRScanner';
import VoiceCloneModal from '../components/VoiceCloneModal';
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
const DEFAULT_STORAGE_PATH = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
@@ -73,6 +74,9 @@ const SettingsScreen: React.FC = () => {
const [storageSize, setStorageSize] = useState('...');
const [ttsEnabled, setTtsEnabled] = useState(true);
const [editingPath, setEditingPath] = useState(false);
const [xttsVoice, setXttsVoice] = useState('');
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
const [tempPath, setTempPath] = useState('');
let logIdCounter = 0;
@@ -95,6 +99,11 @@ const SettingsScreen: React.FC = () => {
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
if (saved !== null) setTtsEnabled(saved === 'true');
});
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
if (saved) setXttsVoice(saved);
});
// Voice-Liste vom XTTS-Server holen (via RVS)
rvs.send('xtts_list_voices' as any, {});
}, []);
// Speichergroesse berechnen
@@ -225,6 +234,22 @@ const SettingsScreen: React.FC = () => {
const mode = message.payload.mode as string;
if (mode) setCurrentMode(mode);
}
// XTTS-Voice-Liste
if (message.type === ('xtts_voices_list' as any)) {
const voices = ((message.payload as any).voices || []) as Array<{name: string, size: number}>;
setAvailableVoices(voices);
}
// Voice wurde gespeichert → Liste neu laden + ggf. auswaehlen
if (message.type === ('xtts_voice_saved' as any)) {
const name = (message.payload as any).name as string;
if (name) {
setXttsVoice(name);
AsyncStorage.setItem('aria_xtts_voice', name);
}
rvs.send('xtts_list_voices' as any, {});
}
});
return () => {
@@ -288,6 +313,36 @@ const SettingsScreen: React.FC = () => {
// In Produktion: Wert in AsyncStorage persistieren
}, []);
// --- XTTS Voice ---
const selectVoice = useCallback((voiceName: string) => {
setXttsVoice(voiceName);
AsyncStorage.setItem('aria_xtts_voice', voiceName);
}, []);
const deleteVoice = useCallback((name: string) => {
Alert.alert(
'Stimme loeschen',
`Stimme "${name}" vom Server endgueltig loeschen?\nAlle Apps verlieren sie.`,
[
{ text: 'Abbrechen', style: 'cancel' },
{
text: 'Loeschen',
style: 'destructive',
onPress: () => {
rvs.send('xtts_delete_voice' as any, { name });
if (xttsVoice === name) {
setXttsVoice('');
AsyncStorage.setItem('aria_xtts_voice', '');
}
// Liste nach kurzer Wartezeit neu laden (XTTS-Bridge schickt eh neue Liste)
setTimeout(() => rvs.send('xtts_list_voices' as any, {}), 500);
},
},
],
);
}, [xttsVoice]);
// --- Modus aendern ---
const handleModeChange = useCallback((modeId: string) => {
@@ -321,6 +376,10 @@ const SettingsScreen: React.FC = () => {
onScan={handleQRScan}
onClose={() => setScannerVisible(false)}
/>
<VoiceCloneModal
visible={voiceCloneVisible}
onClose={() => setVoiceCloneVisible(false)}
/>
<ScrollView style={styles.container} contentContainerStyle={styles.content}>
{/* === Verbindung === */}
@@ -455,7 +514,6 @@ const SettingsScreen: React.FC = () => {
<Text style={styles.toggleHint}>
Nur lokal andere Geraete sind unabhaengig.
Wenn aus, erscheint im Chat auch kein Mund-Button.
Stimme und Voice-Cloning werden zentral in der Diagnose eingestellt.
</Text>
</View>
<Switch
@@ -468,6 +526,65 @@ const SettingsScreen: React.FC = () => {
thumbColor={ttsEnabled ? '#FFFFFF' : '#666680'}
/>
</View>
{ttsEnabled && (
<View style={{marginTop: 20}}>
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
<Text style={styles.toggleHint}>
Eigene Wahl fuer dieses Geraet. Ohne Auswahl gilt der Diagnostic-Default.
</Text>
{/* Default-Option */}
<TouchableOpacity
style={[styles.voiceRow, xttsVoice === '' && styles.voiceRowActive]}
onPress={() => selectVoice('')}
>
<Text style={[styles.voiceRowName, xttsVoice === '' && styles.voiceRowNameActive]}>
Standard (Diagnostic-Default)
</Text>
{xttsVoice === '' && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
</TouchableOpacity>
{availableVoices.length === 0 ? (
<Text style={[styles.toggleHint, {marginTop: 8, textAlign: 'center'}]}>
Keine eigenen Stimmen auf dem XTTS-Server.
</Text>
) : (
availableVoices.map(v => (
<View key={v.name} style={[styles.voiceRow, xttsVoice === v.name && styles.voiceRowActive]}>
<TouchableOpacity
style={{flex: 1}}
onPress={() => selectVoice(v.name)}
>
<Text style={[styles.voiceRowName, xttsVoice === v.name && styles.voiceRowNameActive]}>
{v.name}
</Text>
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
</TouchableOpacity>
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
<Text style={styles.voiceRowDeleteIcon}>X</Text>
</TouchableOpacity>
</View>
))
)}
<View style={{flexDirection: 'row', gap: 8, marginTop: 12}}>
<TouchableOpacity
style={[styles.connectButton, {flex: 1}]}
onPress={() => setVoiceCloneVisible(true)}
>
<Text style={styles.connectButtonText}>{'\uD83C\uDFA4'} Eigene Stimme aufnehmen</Text>
</TouchableOpacity>
<TouchableOpacity
style={[styles.clearButton, {flex: 0.4, marginTop: 0}]}
onPress={() => rvs.send('xtts_list_voices' as any, {})}
>
<Text style={styles.clearButtonText}>Aktualisieren</Text>
</TouchableOpacity>
</View>
</View>
)}
</View>
{/* === Speicher === */}
@@ -782,6 +899,55 @@ const styles = StyleSheet.create({
marginTop: 2,
},
// XTTS Voice List
voiceRow: {
flexDirection: 'row',
alignItems: 'center',
backgroundColor: '#1E1E2E',
borderRadius: 8,
padding: 10,
marginTop: 6,
borderWidth: 1,
borderColor: 'transparent',
},
voiceRowActive: {
borderColor: '#0096FF',
backgroundColor: '#0D1A2E',
},
voiceRowName: {
color: '#CCCCDD',
fontSize: 14,
fontWeight: '500',
},
voiceRowNameActive: {
color: '#FFFFFF',
},
voiceRowMeta: {
color: '#666680',
fontSize: 11,
marginTop: 2,
},
voiceRowCheck: {
color: '#34C759',
fontSize: 16,
fontWeight: '700',
marginHorizontal: 6,
},
voiceRowDelete: {
width: 28,
height: 28,
borderRadius: 14,
backgroundColor: 'rgba(255,59,48,0.2)',
alignItems: 'center',
justifyContent: 'center',
marginLeft: 4,
},
voiceRowDeleteIcon: {
color: '#FF3B30',
fontSize: 12,
fontWeight: '700',
},
// Stimmen
voiceBtn: {
flex: 1,