Compare commits

...

6 Commits

Author SHA1 Message Date
duffyduck 97a1a3089a release: bump version to 0.0.5.1 2026-04-23 22:02:17 +02:00
duffyduck 64f18e97a0 release: bump version to 0.0.5.0 2026-04-23 15:31:18 +02:00
duffyduck 9cbea27455 feat: voice_preload/voice_ready — Feedback wenn neue Stimme geladen ist
XTTS-Bridge:
  - empfaengt neuen voice_preload Type, rendert stumm "ja." fuer die Stimme
    via TTS-Queue (damit kein Konflikt mit echtem TTS)
  - horcht zusaetzlich auf config-Broadcasts: wenn Diagnostic global die
    Stimme wechselt, wird auto-preloaded
  - broadcastet voice_ready mit Dauer (loadMs) oder error

RVS: voice_preload + voice_ready zur ALLOWED_TYPES-Liste.

App (SettingsScreen): beim Wechsel senden wir voice_preload, zeigen einen
Spinner in der Voice-Row und einen Toast mit "Stimme X bereit (Ns)".
App (ChatScreen): Toast auch hier — falls User gerade nicht in Settings ist.

Diagnostic (server+UI): voice_ready wird an Browser durchgereicht, ein
Status-Text unter dem Voice-Dropdown zeigt "wird geladen" → "bereit".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 10:24:08 +02:00
duffyduck c8881f9e4d release: bump version to 0.0.4.9 2026-04-22 23:02:28 +02:00
duffyduck 028e3b2240 fix: Voice-Auswahl funktioniert endlich + Diagnostic setzt alle Apps zurueck
XTTS-Bridge: im daswer123 local-Mode erwartet der Server speaker_wav als
Basename (z.B. "Maia"), nicht als Pfad. Wir haben bisher "/voices/Maia.wav"
geschickt, was der Server stumm verwirft und Default nimmt. Jetzt: speaker
name pur senden + Warnlog wenn File fehlt.

App: ChatScreen + SettingsScreen horchen auf type "config" vom RVS —
wenn in Diagnostic die globale XTTS-Voice gewechselt wird, werden alle
Apps auf den neuen Wert zurueckgesetzt (wie vom User gewuenscht).
Lokale App-Wahl bleibt sonst intakt und gewinnt pro Request.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 19:32:40 +02:00
duffyduck c042f27106 feat: generisches Buchstabieren fuer unbekannte Akronyme
Nach der expliziten _UNIT_WORDS-Liste greift eine Fallback-Regel:
alle verbleibenden 2-5-Zeichen-Grossbuchstaben-Woerter werden
buchstabiert. XTTS → X T T S, USB → U S B, DNS → D N S, JSON → J S O N.

Spezielle Faelle (WLAN, NATO — als Wort gesprochen) koennen bei
Bedarf in _UNIT_WORDS explizit ueberschrieben werden.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 19:17:04 +02:00
9 changed files with 194 additions and 9 deletions
+2 -2
View File
@@ -79,8 +79,8 @@ android {
applicationId "com.ariacockpit"
minSdkVersion rootProject.ext.minSdkVersion
targetSdkVersion rootProject.ext.targetSdkVersion
versionCode 408
versionName "0.0.4.8"
versionCode 501
versionName "0.0.5.1"
// Fallback fuer Libraries mit Product Flavors
missingDimensionStrategy 'react-native-camera', 'general'
}
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "aria-cockpit",
"version": "0.0.4.8",
"version": "0.0.5.1",
"private": true,
"scripts": {
"android": "react-native run-android",
+21
View File
@@ -18,6 +18,7 @@ import {
Image,
ScrollView,
Modal,
ToastAndroid,
} from 'react-native';
import AsyncStorage from '@react-native-async-storage/async-storage';
import RNFS from 'react-native-fs';
@@ -325,6 +326,26 @@ const ChatScreen: React.FC = () => {
const tool = (message.payload.tool as string) || '';
setAgentActivity({ activity, tool });
}
// Voice-Config aus Diagnostic — setzt die lokale App-Stimme auf den
// gerade in Diagnostic gewaehlten Wert zurueck. User-Wahl in der App
// wird dadurch ueberschrieben.
if (message.type === ('config' as any)) {
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
localXttsVoiceRef.current = newVoice;
AsyncStorage.setItem('aria_xtts_voice', newVoice);
}
// XTTS-Bridge meldet Stimme fertig geladen (kurzer Status-Toast)
if (message.type === ('voice_ready' as any)) {
const v = ((message.payload as any).voice as string) ?? '';
const err = (message.payload as any).error as string | undefined;
if (err) {
ToastAndroid.show(`Stimme "${v}" Fehler: ${err}`, ToastAndroid.LONG);
} else {
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
}
}
});
const unsubState = rvs.onStateChange((state) => {
+39 -1
View File
@@ -15,6 +15,8 @@ import {
StyleSheet,
Alert,
Platform,
ToastAndroid,
ActivityIndicator,
} from 'react-native';
import AsyncStorage from '@react-native-async-storage/async-storage';
import RNFS from 'react-native-fs';
@@ -82,6 +84,7 @@ const SettingsScreen: React.FC = () => {
const [ttsPrerollSec, setTtsPrerollSec] = useState<number>(TTS_PREROLL_DEFAULT_SEC);
const [editingPath, setEditingPath] = useState(false);
const [xttsVoice, setXttsVoice] = useState('');
const [loadingVoice, setLoadingVoice] = useState<string | null>(null);
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
const [tempPath, setTempPath] = useState('');
@@ -265,6 +268,31 @@ const SettingsScreen: React.FC = () => {
}
rvs.send('xtts_list_voices' as any, {});
}
// Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen.
// Zusaetzlich Preload triggern, damit der User weiss wann's geladen ist.
if (message.type === ('config' as any)) {
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
setXttsVoice(newVoice);
AsyncStorage.setItem('aria_xtts_voice', newVoice);
if (newVoice) {
setLoadingVoice(newVoice);
}
}
// XTTS-Bridge meldet: Stimme fertig geladen
if (message.type === ('voice_ready' as any)) {
const v = ((message.payload as any).voice as string) ?? '';
const err = (message.payload as any).error as string | undefined;
const ms = (message.payload as any).loadMs as number | undefined;
setLoadingVoice(null);
if (err) {
ToastAndroid.show(`Stimme "${v}" konnte nicht geladen werden: ${err}`, ToastAndroid.LONG);
} else {
const suffix = ms ? ` (${(ms / 1000).toFixed(1)}s)` : '';
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit${suffix}`, ToastAndroid.SHORT);
}
}
});
return () => {
@@ -333,6 +361,13 @@ const SettingsScreen: React.FC = () => {
const selectVoice = useCallback((voiceName: string) => {
setXttsVoice(voiceName);
AsyncStorage.setItem('aria_xtts_voice', voiceName);
// Preload nur fuer Custom-Voices — "Standard" braucht keinen Ladevorgang
if (voiceName) {
setLoadingVoice(voiceName);
rvs.send('voice_preload' as any, { voice: voiceName, source: 'app' });
} else {
setLoadingVoice(null);
}
}, []);
const deleteVoice = useCallback((name: string) => {
@@ -612,7 +647,10 @@ const SettingsScreen: React.FC = () => {
</Text>
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
</TouchableOpacity>
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
{loadingVoice === v.name && (
<ActivityIndicator size="small" color="#0096FF" style={{marginRight: 8}} />
)}
{xttsVoice === v.name && loadingVoice !== v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
<Text style={styles.voiceRowDeleteIcon}>X</Text>
</TouchableOpacity>
+6
View File
@@ -257,6 +257,12 @@ def clean_text_for_tts(text: str) -> str:
for pat, repl in _UNIT_WORDS:
t = _re_tts.sub(pat, repl, t)
# Generisches Buchstabieren: alle verbleibenden 2-5-Zeichen-Grossbuchstaben-Woerter
# (XTTS, USB, DNS, JSON, HTML, ...) → "X T T S". Laeuft NACH der expliziten Liste,
# damit TTS/GPU/... schon aufgeloest sind. "WLAN"-artige, die als Wort gesprochen
# werden, koennen bei Bedarf explizit in _UNIT_WORDS uebersteuert werden.
t = _re_tts.sub(r'\b([A-Z]{2,5})\b', lambda m: " ".join(m.group(1)), t)
# Anfuehrungszeichen
t = _re_tts.sub(r'["""„`]', '', t)
+26 -1
View File
@@ -438,13 +438,14 @@
</div>
<!-- XTTS Stimme -->
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
<div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
<option value="">Standard (XTTS Default)</option>
</select>
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
</div>
<div id="voice-status" style="font-size:11px;min-height:14px;margin-bottom:12px;color:#8888AA;"></div>
<!-- Gecloned Stimmen — Liste mit Loeschen -->
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
@@ -851,6 +852,25 @@
return;
}
if (msg.type === 'voice_ready') {
const v = msg.payload?.voice || '';
const err = msg.payload?.error;
const ms = msg.payload?.loadMs;
const statusEl = document.getElementById('voice-status');
if (statusEl) {
if (err) {
statusEl.textContent = `⚠️ Stimme "${v}" Fehler: ${err}`;
statusEl.style.color = '#FF3B30';
} else {
statusEl.textContent = `✅ Stimme "${v || 'Standard'}" bereit${ms ? ` (${(ms/1000).toFixed(1)}s)` : ''}`;
statusEl.style.color = '#34C759';
}
setTimeout(() => { if (statusEl) statusEl.textContent = ''; }, 5000);
}
addLog('info', 'xtts', err ? `Voice "${v}": ${err}` : `Voice "${v || 'Standard'}" bereit`);
return;
}
if (msg.type === 'watchdog') {
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
const color = colors[msg.status] || '#FFD60A';
@@ -1551,6 +1571,11 @@
const xttsVoice = document.getElementById('diag-xtts-voice').value;
const whisperModel = document.getElementById('diag-whisper-model').value;
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
const statusEl = document.getElementById('voice-status');
if (statusEl && xttsVoice) {
statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
statusEl.style.color = '#FFD60A';
}
}
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
+11
View File
@@ -626,6 +626,17 @@ function connectRVS(forcePlain) {
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
broadcast({ type: "mode", payload: msg.payload });
} else if (msg.type === "voice_ready") {
// XTTS-Bridge meldet Stimme fertig geladen → an Browser durchreichen
const v = msg.payload?.voice || "";
const err = msg.payload?.error;
const ms = msg.payload?.loadMs;
if (err) {
log("warn", "rvs", `Voice-Ready Fehler fuer "${v}": ${err}`);
} else {
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
}
broadcast({ type: "voice_ready", payload: msg.payload });
} else {
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
}
+1
View File
@@ -19,6 +19,7 @@ const ALLOWED_TYPES = new Set([
"agent_activity", "cancel_request",
"audio_pcm",
"xtts_delete_voice",
"voice_preload", "voice_ready",
]);
// Token-Raum: token -> { clients: Set<ws> }
+87 -4
View File
@@ -69,6 +69,18 @@ function connectRVS(forcePlain) {
await handleListVoices();
} else if (msg.type === "xtts_delete_voice") {
await handleDeleteVoice(msg.payload);
} else if (msg.type === "voice_preload") {
await handleVoicePreload(msg.payload);
} else if (msg.type === "config") {
// Diagnostic hat globale Voice gewechselt → Preload damit der naechste
// Render ohne Ladewartezeit startet + alle Clients "voice_ready" sehen
const v = msg.payload && msg.payload.xttsVoice;
if (v && v !== lastDiagnosticVoice) {
lastDiagnosticVoice = v;
await handleVoicePreload({ voice: v, source: "diagnostic" });
} else if (!v) {
lastDiagnosticVoice = "";
}
}
} catch (err) {
log(`Fehler: ${err.message}`);
@@ -120,6 +132,10 @@ function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) {
// interleaved PCM-Chunks aus zwei Rendern → klingt wie Chaos.
let ttsQueue = Promise.resolve();
// Merkt sich die letzte in Diagnostic gewaehlte Voice, damit wir nicht bei jedem
// config-Broadcast (auch ohne Aenderung) einen Preload triggern.
let lastDiagnosticVoice = "";
function handleTTSRequest(payload) {
ttsQueue = ttsQueue.then(() => _runTTSRequest(payload)).catch(err => {
log(`TTS-Queue Fehler: ${err.message}`);
@@ -151,8 +167,18 @@ async function _runTTSRequest(payload) {
log(`TTS-Request (streaming): "${cleanText.slice(0, 80)}..." (${cleanText.length} chars, voice: ${voice || "default"})`);
try {
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
// Im local-Mode erwartet daswer123 XTTS speaker_wav als Basename (ohne .wav,
// ohne Pfad) — der Server prefixt EXAMPLE_FOLDER selbst. Wir checken hier
// nur das physische File ab um Warnungen zu loggen; runter ans API geht
// nur der Name.
const voiceFilePath = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
const hasCustomVoice = voiceFilePath && fs.existsSync(voiceFilePath);
const speakerName = hasCustomVoice ? voice : "";
if (voice && !hasCustomVoice) {
log(`WARNUNG: Voice "${voice}" angefordert, aber ${voiceFilePath} existiert nicht — nehme Default`);
} else if (hasCustomVoice) {
log(`Voice "${voice}" verwendet (speaker_wav="${speakerName}")`);
}
let chunkIndex = 0;
let pcmMeta = null;
@@ -190,7 +216,7 @@ async function _runTTSRequest(payload) {
await streamXTTSAsPCM(
cleanText,
language || "de",
hasCustomVoice ? voiceSample : null,
speakerName,
onChunk,
);
} catch (streamErr) {
@@ -198,7 +224,7 @@ async function _runTTSRequest(payload) {
await streamXTTSBatch(
cleanText,
language || "de",
hasCustomVoice ? voiceSample : null,
speakerName,
onChunk,
);
}
@@ -460,6 +486,63 @@ async function handleDeleteVoice(payload) {
// ── Voice List Handler ──────────────────────────────
/**
* Preload einer Stimme — rendert stumm ein kurzes Dummy-Audio, damit XTTS
* die Speaker-Latents laedt und der naechste echte Request ohne Wartezeit
* loslegen kann. Broadcastet "voice_ready" wenn fertig (oder mit error).
*/
async function handleVoicePreload(payload) {
const voice = (payload && payload.voice) || "";
const source = (payload && payload.source) || "unknown";
const requestId = (payload && payload.requestId) || "";
log(`Voice-Preload angefordert: "${voice}" (source=${source})`);
try {
let speakerName = "";
if (voice) {
const voiceFilePath = path.join(VOICES_DIR, `${voice}.wav`);
if (!fs.existsSync(voiceFilePath)) {
sendToRVS({
type: "voice_ready",
payload: { voice, requestId, error: "voice-file-not-found" },
timestamp: Date.now(),
});
log(`Preload abgebrochen: ${voiceFilePath} existiert nicht`);
return;
}
speakerName = voice;
}
// Dummy-Request via Queue — damit sich Preload nicht mit echtem TTS ueberholt.
const t0 = Date.now();
await new Promise((resolve, reject) => {
ttsQueue = ttsQueue.then(async () => {
try {
await streamXTTSAsPCM("ja.", "de", speakerName, () => {});
resolve();
} catch (err) {
reject(err);
}
}).catch(reject);
});
const ms = Date.now() - t0;
log(`Voice "${voice || "default"}" geladen in ${ms}ms`);
sendToRVS({
type: "voice_ready",
payload: { voice, requestId, loadMs: ms },
timestamp: Date.now(),
});
} catch (err) {
log(`Voice-Preload Fehler: ${err.message}`);
sendToRVS({
type: "voice_ready",
payload: { voice, requestId, error: err.message.slice(0, 200) },
timestamp: Date.now(),
});
}
}
async function handleListVoices() {
try {
const files = fs.existsSync(VOICES_DIR)