Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 64f18e97a0 | |||
| 9cbea27455 | |||
| c8881f9e4d | |||
| 028e3b2240 | |||
| c042f27106 |
@@ -79,8 +79,8 @@ android {
|
||||
applicationId "com.ariacockpit"
|
||||
minSdkVersion rootProject.ext.minSdkVersion
|
||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||
versionCode 408
|
||||
versionName "0.0.4.8"
|
||||
versionCode 500
|
||||
versionName "0.0.5.0"
|
||||
// Fallback fuer Libraries mit Product Flavors
|
||||
missingDimensionStrategy 'react-native-camera', 'general'
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "aria-cockpit",
|
||||
"version": "0.0.4.8",
|
||||
"version": "0.0.5.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"android": "react-native run-android",
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
Image,
|
||||
ScrollView,
|
||||
Modal,
|
||||
ToastAndroid,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import RNFS from 'react-native-fs';
|
||||
@@ -325,6 +326,26 @@ const ChatScreen: React.FC = () => {
|
||||
const tool = (message.payload.tool as string) || '';
|
||||
setAgentActivity({ activity, tool });
|
||||
}
|
||||
|
||||
// Voice-Config aus Diagnostic — setzt die lokale App-Stimme auf den
|
||||
// gerade in Diagnostic gewaehlten Wert zurueck. User-Wahl in der App
|
||||
// wird dadurch ueberschrieben.
|
||||
if (message.type === ('config' as any)) {
|
||||
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
|
||||
localXttsVoiceRef.current = newVoice;
|
||||
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||
}
|
||||
|
||||
// XTTS-Bridge meldet Stimme fertig geladen (kurzer Status-Toast)
|
||||
if (message.type === ('voice_ready' as any)) {
|
||||
const v = ((message.payload as any).voice as string) ?? '';
|
||||
const err = (message.payload as any).error as string | undefined;
|
||||
if (err) {
|
||||
ToastAndroid.show(`Stimme "${v}" Fehler: ${err}`, ToastAndroid.LONG);
|
||||
} else {
|
||||
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const unsubState = rvs.onStateChange((state) => {
|
||||
|
||||
@@ -15,6 +15,8 @@ import {
|
||||
StyleSheet,
|
||||
Alert,
|
||||
Platform,
|
||||
ToastAndroid,
|
||||
ActivityIndicator,
|
||||
} from 'react-native';
|
||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import RNFS from 'react-native-fs';
|
||||
@@ -82,6 +84,7 @@ const SettingsScreen: React.FC = () => {
|
||||
const [ttsPrerollSec, setTtsPrerollSec] = useState<number>(TTS_PREROLL_DEFAULT_SEC);
|
||||
const [editingPath, setEditingPath] = useState(false);
|
||||
const [xttsVoice, setXttsVoice] = useState('');
|
||||
const [loadingVoice, setLoadingVoice] = useState<string | null>(null);
|
||||
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
||||
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
|
||||
const [tempPath, setTempPath] = useState('');
|
||||
@@ -265,6 +268,31 @@ const SettingsScreen: React.FC = () => {
|
||||
}
|
||||
rvs.send('xtts_list_voices' as any, {});
|
||||
}
|
||||
|
||||
// Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen.
|
||||
// Zusaetzlich Preload triggern, damit der User weiss wann's geladen ist.
|
||||
if (message.type === ('config' as any)) {
|
||||
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
|
||||
setXttsVoice(newVoice);
|
||||
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||
if (newVoice) {
|
||||
setLoadingVoice(newVoice);
|
||||
}
|
||||
}
|
||||
|
||||
// XTTS-Bridge meldet: Stimme fertig geladen
|
||||
if (message.type === ('voice_ready' as any)) {
|
||||
const v = ((message.payload as any).voice as string) ?? '';
|
||||
const err = (message.payload as any).error as string | undefined;
|
||||
const ms = (message.payload as any).loadMs as number | undefined;
|
||||
setLoadingVoice(null);
|
||||
if (err) {
|
||||
ToastAndroid.show(`Stimme "${v}" konnte nicht geladen werden: ${err}`, ToastAndroid.LONG);
|
||||
} else {
|
||||
const suffix = ms ? ` (${(ms / 1000).toFixed(1)}s)` : '';
|
||||
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit${suffix}`, ToastAndroid.SHORT);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return () => {
|
||||
@@ -333,6 +361,13 @@ const SettingsScreen: React.FC = () => {
|
||||
const selectVoice = useCallback((voiceName: string) => {
|
||||
setXttsVoice(voiceName);
|
||||
AsyncStorage.setItem('aria_xtts_voice', voiceName);
|
||||
// Preload nur fuer Custom-Voices — "Standard" braucht keinen Ladevorgang
|
||||
if (voiceName) {
|
||||
setLoadingVoice(voiceName);
|
||||
rvs.send('voice_preload' as any, { voice: voiceName, source: 'app' });
|
||||
} else {
|
||||
setLoadingVoice(null);
|
||||
}
|
||||
}, []);
|
||||
|
||||
const deleteVoice = useCallback((name: string) => {
|
||||
@@ -612,7 +647,10 @@ const SettingsScreen: React.FC = () => {
|
||||
</Text>
|
||||
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
|
||||
</TouchableOpacity>
|
||||
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
{loadingVoice === v.name && (
|
||||
<ActivityIndicator size="small" color="#0096FF" style={{marginRight: 8}} />
|
||||
)}
|
||||
{xttsVoice === v.name && loadingVoice !== v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
|
||||
<Text style={styles.voiceRowDeleteIcon}>X</Text>
|
||||
</TouchableOpacity>
|
||||
|
||||
@@ -257,6 +257,12 @@ def clean_text_for_tts(text: str) -> str:
|
||||
for pat, repl in _UNIT_WORDS:
|
||||
t = _re_tts.sub(pat, repl, t)
|
||||
|
||||
# Generisches Buchstabieren: alle verbleibenden 2-5-Zeichen-Grossbuchstaben-Woerter
|
||||
# (XTTS, USB, DNS, JSON, HTML, ...) → "X T T S". Laeuft NACH der expliziten Liste,
|
||||
# damit TTS/GPU/... schon aufgeloest sind. "WLAN"-artige, die als Wort gesprochen
|
||||
# werden, koennen bei Bedarf explizit in _UNIT_WORDS uebersteuert werden.
|
||||
t = _re_tts.sub(r'\b([A-Z]{2,5})\b', lambda m: " ".join(m.group(1)), t)
|
||||
|
||||
# Anfuehrungszeichen
|
||||
t = _re_tts.sub(r'["""„`]', '', t)
|
||||
|
||||
|
||||
+26
-1
@@ -438,13 +438,14 @@
|
||||
</div>
|
||||
|
||||
<!-- XTTS Stimme -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
|
||||
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
|
||||
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||
<option value="">Standard (XTTS Default)</option>
|
||||
</select>
|
||||
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
|
||||
</div>
|
||||
<div id="voice-status" style="font-size:11px;min-height:14px;margin-bottom:12px;color:#8888AA;"></div>
|
||||
|
||||
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
||||
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
||||
@@ -851,6 +852,25 @@
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'voice_ready') {
|
||||
const v = msg.payload?.voice || '';
|
||||
const err = msg.payload?.error;
|
||||
const ms = msg.payload?.loadMs;
|
||||
const statusEl = document.getElementById('voice-status');
|
||||
if (statusEl) {
|
||||
if (err) {
|
||||
statusEl.textContent = `⚠️ Stimme "${v}" Fehler: ${err}`;
|
||||
statusEl.style.color = '#FF3B30';
|
||||
} else {
|
||||
statusEl.textContent = `✅ Stimme "${v || 'Standard'}" bereit${ms ? ` (${(ms/1000).toFixed(1)}s)` : ''}`;
|
||||
statusEl.style.color = '#34C759';
|
||||
}
|
||||
setTimeout(() => { if (statusEl) statusEl.textContent = ''; }, 5000);
|
||||
}
|
||||
addLog('info', 'xtts', err ? `Voice "${v}": ${err}` : `Voice "${v || 'Standard'}" bereit`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (msg.type === 'watchdog') {
|
||||
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
|
||||
const color = colors[msg.status] || '#FFD60A';
|
||||
@@ -1551,6 +1571,11 @@
|
||||
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
||||
const whisperModel = document.getElementById('diag-whisper-model').value;
|
||||
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
||||
const statusEl = document.getElementById('voice-status');
|
||||
if (statusEl && xttsVoice) {
|
||||
statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
|
||||
statusEl.style.color = '#FFD60A';
|
||||
}
|
||||
}
|
||||
|
||||
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
|
||||
|
||||
@@ -626,6 +626,17 @@ function connectRVS(forcePlain) {
|
||||
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
|
||||
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
|
||||
broadcast({ type: "mode", payload: msg.payload });
|
||||
} else if (msg.type === "voice_ready") {
|
||||
// XTTS-Bridge meldet Stimme fertig geladen → an Browser durchreichen
|
||||
const v = msg.payload?.voice || "";
|
||||
const err = msg.payload?.error;
|
||||
const ms = msg.payload?.loadMs;
|
||||
if (err) {
|
||||
log("warn", "rvs", `Voice-Ready Fehler fuer "${v}": ${err}`);
|
||||
} else {
|
||||
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
|
||||
}
|
||||
broadcast({ type: "voice_ready", payload: msg.payload });
|
||||
} else {
|
||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ const ALLOWED_TYPES = new Set([
|
||||
"agent_activity", "cancel_request",
|
||||
"audio_pcm",
|
||||
"xtts_delete_voice",
|
||||
"voice_preload", "voice_ready",
|
||||
]);
|
||||
|
||||
// Token-Raum: token -> { clients: Set<ws> }
|
||||
|
||||
+87
-4
@@ -69,6 +69,18 @@ function connectRVS(forcePlain) {
|
||||
await handleListVoices();
|
||||
} else if (msg.type === "xtts_delete_voice") {
|
||||
await handleDeleteVoice(msg.payload);
|
||||
} else if (msg.type === "voice_preload") {
|
||||
await handleVoicePreload(msg.payload);
|
||||
} else if (msg.type === "config") {
|
||||
// Diagnostic hat globale Voice gewechselt → Preload damit der naechste
|
||||
// Render ohne Ladewartezeit startet + alle Clients "voice_ready" sehen
|
||||
const v = msg.payload && msg.payload.xttsVoice;
|
||||
if (v && v !== lastDiagnosticVoice) {
|
||||
lastDiagnosticVoice = v;
|
||||
await handleVoicePreload({ voice: v, source: "diagnostic" });
|
||||
} else if (!v) {
|
||||
lastDiagnosticVoice = "";
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
log(`Fehler: ${err.message}`);
|
||||
@@ -120,6 +132,10 @@ function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) {
|
||||
// interleaved PCM-Chunks aus zwei Rendern → klingt wie Chaos.
|
||||
let ttsQueue = Promise.resolve();
|
||||
|
||||
// Merkt sich die letzte in Diagnostic gewaehlte Voice, damit wir nicht bei jedem
|
||||
// config-Broadcast (auch ohne Aenderung) einen Preload triggern.
|
||||
let lastDiagnosticVoice = "";
|
||||
|
||||
function handleTTSRequest(payload) {
|
||||
ttsQueue = ttsQueue.then(() => _runTTSRequest(payload)).catch(err => {
|
||||
log(`TTS-Queue Fehler: ${err.message}`);
|
||||
@@ -151,8 +167,18 @@ async function _runTTSRequest(payload) {
|
||||
log(`TTS-Request (streaming): "${cleanText.slice(0, 80)}..." (${cleanText.length} chars, voice: ${voice || "default"})`);
|
||||
|
||||
try {
|
||||
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
||||
// Im local-Mode erwartet daswer123 XTTS speaker_wav als Basename (ohne .wav,
|
||||
// ohne Pfad) — der Server prefixt EXAMPLE_FOLDER selbst. Wir checken hier
|
||||
// nur das physische File ab um Warnungen zu loggen; runter ans API geht
|
||||
// nur der Name.
|
||||
const voiceFilePath = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||
const hasCustomVoice = voiceFilePath && fs.existsSync(voiceFilePath);
|
||||
const speakerName = hasCustomVoice ? voice : "";
|
||||
if (voice && !hasCustomVoice) {
|
||||
log(`WARNUNG: Voice "${voice}" angefordert, aber ${voiceFilePath} existiert nicht — nehme Default`);
|
||||
} else if (hasCustomVoice) {
|
||||
log(`Voice "${voice}" verwendet (speaker_wav="${speakerName}")`);
|
||||
}
|
||||
|
||||
let chunkIndex = 0;
|
||||
let pcmMeta = null;
|
||||
@@ -190,7 +216,7 @@ async function _runTTSRequest(payload) {
|
||||
await streamXTTSAsPCM(
|
||||
cleanText,
|
||||
language || "de",
|
||||
hasCustomVoice ? voiceSample : null,
|
||||
speakerName,
|
||||
onChunk,
|
||||
);
|
||||
} catch (streamErr) {
|
||||
@@ -198,7 +224,7 @@ async function _runTTSRequest(payload) {
|
||||
await streamXTTSBatch(
|
||||
cleanText,
|
||||
language || "de",
|
||||
hasCustomVoice ? voiceSample : null,
|
||||
speakerName,
|
||||
onChunk,
|
||||
);
|
||||
}
|
||||
@@ -460,6 +486,63 @@ async function handleDeleteVoice(payload) {
|
||||
|
||||
// ── Voice List Handler ──────────────────────────────
|
||||
|
||||
/**
|
||||
* Preload einer Stimme — rendert stumm ein kurzes Dummy-Audio, damit XTTS
|
||||
* die Speaker-Latents laedt und der naechste echte Request ohne Wartezeit
|
||||
* loslegen kann. Broadcastet "voice_ready" wenn fertig (oder mit error).
|
||||
*/
|
||||
async function handleVoicePreload(payload) {
|
||||
const voice = (payload && payload.voice) || "";
|
||||
const source = (payload && payload.source) || "unknown";
|
||||
const requestId = (payload && payload.requestId) || "";
|
||||
log(`Voice-Preload angefordert: "${voice}" (source=${source})`);
|
||||
|
||||
try {
|
||||
let speakerName = "";
|
||||
if (voice) {
|
||||
const voiceFilePath = path.join(VOICES_DIR, `${voice}.wav`);
|
||||
if (!fs.existsSync(voiceFilePath)) {
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, error: "voice-file-not-found" },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
log(`Preload abgebrochen: ${voiceFilePath} existiert nicht`);
|
||||
return;
|
||||
}
|
||||
speakerName = voice;
|
||||
}
|
||||
|
||||
// Dummy-Request via Queue — damit sich Preload nicht mit echtem TTS ueberholt.
|
||||
const t0 = Date.now();
|
||||
await new Promise((resolve, reject) => {
|
||||
ttsQueue = ttsQueue.then(async () => {
|
||||
try {
|
||||
await streamXTTSAsPCM("ja.", "de", speakerName, () => {});
|
||||
resolve();
|
||||
} catch (err) {
|
||||
reject(err);
|
||||
}
|
||||
}).catch(reject);
|
||||
});
|
||||
const ms = Date.now() - t0;
|
||||
log(`Voice "${voice || "default"}" geladen in ${ms}ms`);
|
||||
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, loadMs: ms },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} catch (err) {
|
||||
log(`Voice-Preload Fehler: ${err.message}`);
|
||||
sendToRVS({
|
||||
type: "voice_ready",
|
||||
payload: { voice, requestId, error: err.message.slice(0, 200) },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function handleListVoices() {
|
||||
try {
|
||||
const files = fs.existsSync(VOICES_DIR)
|
||||
|
||||
Reference in New Issue
Block a user