-
-
-
+
+
+
Stimme klonen
+
+ Lade ein oder mehrere Audio-Samples hoch (WAV/MP3, min. 6-10 Sekunden).
+ Mehrere Dateien werden automatisch zusammengefuegt.
-
-
-
-
Stimme klonen
-
- Lade ein oder mehrere Audio-Samples hoch (WAV/MP3, min. 6-10 Sekunden).
- Mehrere Dateien werden automatisch zusammengefuegt.
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
- XTTS-Server: Nicht verbunden (starte xtts/ auf dem Gaming-PC)
+
+
+
+
+
+
+
+
+
+
+ XTTS-Server: Nicht verbunden (starte xtts/ auf dem Gaming-PC)
@@ -798,11 +738,8 @@
return;
}
if (msg.type === 'tts_status') {
- document.getElementById('tts-default-voice').textContent = msg.defaultVoice || '?';
- document.getElementById('tts-highlight-voice').textContent = msg.highlightVoice || '?';
document.getElementById('tts-status').textContent = msg.ok ? 'OK' : 'Fehler';
document.getElementById('tts-status').style.color = msg.ok ? '#34C759' : '#FF3B30';
- if (msg.voices) ttsLog(`Stimmen: ${msg.voices.join(', ')}`);
if (msg.error) { document.getElementById('tts-last-error').textContent = msg.error; ttsLog(`Fehler: ${msg.error}`); }
else { document.getElementById('tts-last-error').textContent = '-'; ttsLog('TTS OK'); }
return;
@@ -835,16 +772,7 @@
}
if (msg.type === 'voice_config') {
- document.getElementById('diag-default-voice').value = msg.defaultVoice || 'ramona';
- document.getElementById('diag-highlight-voice').value = msg.highlightVoice || 'thorsten';
document.getElementById('diag-tts-enabled').checked = msg.ttsEnabled !== false;
- const sr = msg.speedRamona || 1.0;
- const st = msg.speedThorsten || 1.0;
- document.getElementById('diag-speed-ramona').value = sr;
- document.getElementById('speed-ramona-label').textContent = sr + 'x';
- document.getElementById('diag-speed-thorsten').value = st;
- document.getElementById('speed-thorsten-label').textContent = st + 'x';
- document.getElementById('diag-tts-engine').value = msg.ttsEngine || 'piper';
// XTTS-Voice setzen — Option hinzufuegen falls nicht vorhanden
const xttsSelect = document.getElementById('diag-xtts-voice');
const xttsVoice = msg.xttsVoice || '';
@@ -855,7 +783,6 @@
xttsSelect.appendChild(opt);
}
xttsSelect.value = xttsVoice;
- toggleXTTSPanel();
// Whisper-Modell wiederherstellen (falls gesetzt)
if (msg.whisperModel) {
const wSel = document.getElementById('diag-whisper-model');
@@ -1429,10 +1356,9 @@
}
// ── XTTS Panel ─────────────────────────────
+ // Legacy no-op (XTTS ist jetzt die einzige Engine, kein Panel-Toggle noetig)
function toggleXTTSPanel() {
- const engine = document.getElementById('diag-tts-engine').value;
- document.getElementById('piper-panel').style.display = engine === 'piper' ? 'block' : 'none';
- document.getElementById('xtts-panel').style.display = engine === 'xtts' ? 'block' : 'none';
+ void 0;
if (engine === 'xtts') loadXTTSVoices();
}
@@ -1540,15 +1466,10 @@
// ── Stimmen-Config ──────────────────────────
function sendVoiceConfig() {
- const defaultVoice = document.getElementById('diag-default-voice').value;
- const highlightVoice = document.getElementById('diag-highlight-voice').value;
const ttsEnabled = document.getElementById('diag-tts-enabled').checked;
- const speedRamona = parseFloat(document.getElementById('diag-speed-ramona').value);
- const speedThorsten = parseFloat(document.getElementById('diag-speed-thorsten').value);
- const ttsEngine = document.getElementById('diag-tts-engine').value;
const xttsVoice = document.getElementById('diag-xtts-voice').value;
const whisperModel = document.getElementById('diag-whisper-model').value;
- send({ action: 'send_voice_config', defaultVoice, highlightVoice, ttsEnabled, speedRamona, speedThorsten, ttsEngine, xttsVoice, whisperModel });
+ send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
}
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
diff --git a/diagnostic/server.js b/diagnostic/server.js
index 1ce5ee6..ae24b4e 100644
--- a/diagnostic/server.js
+++ b/diagnostic/server.js
@@ -1343,18 +1343,12 @@ wss.on("connection", (ws) => {
handleGetVoiceConfig(ws);
} else if (msg.action === "send_voice_config") {
// Stimmen-Config persistent speichern + an Bridge via RVS senden
- // Bestehende Config lesen um Felder zu mergen die dieser Call nicht setzt
let existing = {};
try { existing = JSON.parse(fs.readFileSync("/shared/config/voice_config.json", "utf-8")); } catch {}
const voiceConfig = {
...existing,
- defaultVoice: msg.defaultVoice || "ramona",
- highlightVoice: msg.highlightVoice || "thorsten",
ttsEnabled: msg.ttsEnabled !== false,
- ttsEngine: msg.ttsEngine || "piper",
xttsVoice: msg.xttsVoice || "",
- speedRamona: msg.speedRamona || 1.0,
- speedThorsten: msg.speedThorsten || 1.0,
};
if (msg.whisperModel !== undefined) voiceConfig.whisperModel = msg.whisperModel;
try {
@@ -1362,13 +1356,13 @@ wss.on("connection", (ws) => {
fs.writeFileSync("/shared/config/voice_config.json", JSON.stringify(voiceConfig, null, 2));
} catch {}
sendToRVS_raw({ type: "config", payload: voiceConfig, timestamp: Date.now() });
- log("info", "server", `Voice-Config gespeichert+gesendet: default=${voiceConfig.defaultVoice}, whisper=${voiceConfig.whisperModel || "-"}`);
+ log("info", "server", `Voice-Config gespeichert: xttsVoice=${voiceConfig.xttsVoice || "default"}, whisper=${voiceConfig.whisperModel || "-"}`);
} else if (msg.action === "get_triggers") {
handleGetTriggers(ws);
} else if (msg.action === "save_triggers") {
handleSaveTriggers(ws, msg.triggers || []);
} else if (msg.action === "test_tts") {
- handleTestTTS(ws, msg.voice || "ramona", msg.text || "Test");
+ handleTestTTS(ws, msg.text || "Test");
} else if (msg.action === "check_tts") {
handleCheckTTS(ws);
} else if (msg.action === "check_desktop") {
@@ -1508,32 +1502,21 @@ function handleGetVoiceConfig(clientWs) {
const config = JSON.parse(fs.readFileSync(configPath, "utf-8"));
clientWs.send(JSON.stringify({ type: "voice_config", ...config }));
} else {
- clientWs.send(JSON.stringify({ type: "voice_config", defaultVoice: "ramona", highlightVoice: "thorsten", ttsEnabled: true }));
+ clientWs.send(JSON.stringify({ type: "voice_config", ttsEnabled: true, xttsVoice: "" }));
}
} catch (err) {
- clientWs.send(JSON.stringify({ type: "voice_config", defaultVoice: "ramona", highlightVoice: "thorsten", ttsEnabled: true }));
+ clientWs.send(JSON.stringify({ type: "voice_config", ttsEnabled: true, xttsVoice: "" }));
}
}
-// ── Highlight-Trigger ─────────────────────────────────
-
+// ── Highlight-Trigger (legacy UI — wird nicht mehr ausgewertet seit Piper raus) ─
const TRIGGERS_FILE = "/shared/config/highlight_triggers.json";
async function handleGetTriggers(clientWs) {
try {
- // Zuerst aus Shared Volume lesen, dann Fallback auf Bridge-Defaults
- let triggers;
- if (fs.existsSync(TRIGGERS_FILE)) {
- triggers = JSON.parse(fs.readFileSync(TRIGGERS_FILE, "utf-8"));
- } else {
- // Defaults aus der Bridge lesen
- const result = await dockerExec("aria-bridge", `python3 -c "
-import sys; sys.path.insert(0,'/app')
-from aria_bridge import EPIC_TRIGGERS
-print('\\n'.join(EPIC_TRIGGERS))
-"`);
- triggers = result.trim().split("\n").filter(t => t);
- }
+ const triggers = fs.existsSync(TRIGGERS_FILE)
+ ? JSON.parse(fs.readFileSync(TRIGGERS_FILE, "utf-8"))
+ : [];
clientWs.send(JSON.stringify({ type: "trigger_list", triggers }));
} catch (err) {
clientWs.send(JSON.stringify({ type: "trigger_list", triggers: [], error: err.message }));
@@ -1542,74 +1525,40 @@ print('\\n'.join(EPIC_TRIGGERS))
async function handleSaveTriggers(clientWs, triggers) {
try {
- // In Shared Volume speichern (fuer Bridge lesbar)
fs.mkdirSync("/shared/config", { recursive: true });
fs.writeFileSync(TRIGGERS_FILE, JSON.stringify(triggers, null, 2));
log("info", "server", `${triggers.length} Highlight-Trigger gespeichert`);
- // Bridge informieren (wird beim naechsten Start geladen)
clientWs.send(JSON.stringify({ type: "trigger_list", triggers }));
} catch (err) {
log("error", "server", `Trigger speichern fehlgeschlagen: ${err.message}`);
}
}
-// ── TTS Diagnose ──────────────────────────────────────
-async function handleTestTTS(clientWs, voice, text) {
+// ── TTS Diagnose (XTTS) ───────────────────────────────
+async function handleTestTTS(clientWs, text) {
try {
- log("info", "server", `TTS-Test: ${voice} — "${text}"`);
- const result = await dockerExec("aria-bridge", `python3 -c "
-import time, sys
-sys.path.insert(0, '/app')
-from piper import PiperVoice
-import wave, tempfile, os
-voices = {'ramona': '/voices/de_DE-ramona-low.onnx', 'thorsten': '/voices/de_DE-thorsten-high.onnx'}
-path = voices.get('${voice}')
-if not path or not os.path.exists(path):
- print('FEHLER: Stimme nicht gefunden')
- sys.exit(1)
-v = PiperVoice.load(path)
-start = time.time()
-tmp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-with wave.open(tmp.name, 'wb') as wf:
- wf.setnchannels(1)
- wf.setsampwidth(2)
- wf.setframerate(v.config.sample_rate)
- v.synthesize('${text.replace(/'/g, "\\'")}', wf)
-size = os.path.getsize(tmp.name)
-dur = int((time.time() - start) * 1000)
-os.unlink(tmp.name)
-print(f'OK:{dur}:{size}')
-"`);
- const parts = result.trim().split(":");
- if (parts[0] === "OK") {
- clientWs.send(JSON.stringify({ type: "tts_result", ok: true, voice, duration: parts[1], size: parts[2] }));
- } else {
- clientWs.send(JSON.stringify({ type: "tts_result", ok: false, voice, error: result.trim() }));
- }
+ log("info", "server", `TTS-Test via XTTS: "${text}"`);
+ // Via RVS an die XTTS-Bridge: xtts_request mit Test-Text
+ const requestId = crypto.randomUUID();
+ sendToRVS_raw({
+ type: "xtts_request",
+ payload: { text, language: "de", requestId, voice: "" },
+ timestamp: Date.now(),
+ });
+ clientWs.send(JSON.stringify({ type: "tts_result", ok: true, duration: "pending", size: "?" }));
} catch (err) {
- clientWs.send(JSON.stringify({ type: "tts_result", ok: false, voice, error: err.message }));
+ clientWs.send(JSON.stringify({ type: "tts_result", ok: false, error: err.message }));
}
}
async function handleCheckTTS(clientWs) {
try {
- const result = await dockerExec("aria-bridge", `python3 -c "
-import os, json
-voices = {}
-for name, path in [('ramona', '/voices/de_DE-ramona-low.onnx'), ('thorsten', '/voices/de_DE-thorsten-high.onnx')]:
- voices[name] = os.path.exists(path)
-print(json.dumps(voices))
-"`);
- const voices = JSON.parse(result.trim());
- const available = Object.entries(voices).filter(([,v]) => v).map(([k]) => k);
- const missing = Object.entries(voices).filter(([,v]) => !v).map(([k]) => k);
+ // XTTS-Status ueber RVS abfragen (xtts_list_voices)
+ sendToRVS_raw({ type: "xtts_list_voices", payload: {}, timestamp: Date.now() });
clientWs.send(JSON.stringify({
type: "tts_status",
- ok: missing.length === 0,
- voices: available,
- defaultVoice: "ramona",
- highlightVoice: "thorsten",
- error: missing.length > 0 ? `Fehlend: ${missing.join(", ")}` : null,
+ ok: true,
+ error: null,
}));
} catch (err) {
clientWs.send(JSON.stringify({ type: "tts_status", ok: false, error: err.message }));
diff --git a/docker-compose.yml b/docker-compose.yml
index d7057d6..bdc37dc 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -72,7 +72,6 @@ services:
- aria
network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789
volumes:
- - ./aria-data/voices:/voices:ro # TTS Stimmen
- ./aria-data/config/aria.env:/config/aria.env
- aria-shared:/shared # Shared Volume fuer Datei-Austausch (Bridge <> Core)
# Audio-Zugriff
diff --git a/get-voices.sh b/get-voices.sh
deleted file mode 100755
index 6e9930b..0000000
--- a/get-voices.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-# ════════════════════════════════════════════════
-# ARIA — Piper Stimmen herunterladen
-# Ramona (Alltag) + Thorsten (epische Momente)
-# ════════════════════════════════════════════════
-
-set -e
-
-VOICES_DIR="aria-data/voices"
-BASE_URL="https://huggingface.co/rhasspy/piper-voices/resolve/main/de/de_DE"
-
-mkdir -p "$VOICES_DIR"
-cd "$VOICES_DIR"
-
-echo "Lade ARIA Stimmen..."
-echo ""
-
-echo "[1/4] Ramona (Modell)..."
-wget -q --show-progress "$BASE_URL/ramona/low/de_DE-ramona-low.onnx"
-
-echo "[2/4] Ramona (Config)..."
-wget -q --show-progress "$BASE_URL/ramona/low/de_DE-ramona-low.onnx.json"
-
-echo "[3/4] Thorsten (Modell)..."
-wget -q --show-progress "$BASE_URL/thorsten/high/de_DE-thorsten-high.onnx"
-
-echo "[4/4] Thorsten (Config)..."
-wget -q --show-progress "$BASE_URL/thorsten/high/de_DE-thorsten-high.onnx.json"
-
-echo ""
-echo "Stimmen geladen!"
-ls -lh *.onnx
diff --git a/issue.md b/issue.md
index 314fe92..563df6b 100644
--- a/issue.md
+++ b/issue.md
@@ -37,6 +37,8 @@
- [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
- [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
- [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
+- [x] Streaming TTS (Weg A): XTTS → PCM-Stream → aria-bridge → App AudioTrack MODE_STREAM, keine WAV-Gaps mehr
+- [x] Piper komplett entfernt: nur noch XTTS v2 als TTS-Engine (remote, GPU auf Gaming-PC). Wenn XTTS offline ist, ist ARIA stumm — bewusst akzeptiert.
- [x] Gespraechsmodus: Speech-Gate strenger (-28dB / 500ms) — keine Umgebungsgeraeusche mehr
- [x] Gespraechsmodus: Max-Dauer 30s pro Aufnahme, Cache-Cleanup alter Files, Messages-Array gekappt (500)
- [x] Diagnostic: Archivierte Session-Versionen (.reset.*) werden angezeigt + exportierbar — OpenClaw resettet Sessions bei erster Nutzung nach Container-Restart, Inhalt ist aber in .reset.
Dateien gesichert
@@ -65,11 +67,7 @@
- [ ] QR-Code Onboarding: Diagnostic generiert QR mit RVS-Credentials, App scannt — keine manuelle Eingabe mehr
### TTS / Audio
-- [ ] XTTS Audio-Streaming (PCM-Stream statt WAV-Dateien, eliminiert Stottern komplett)
- [ ] Audio-Normalisierung (Lautstaerke zwischen Chunks angleichen)
-- [ ] Piper Voices Download ueber Diagnostic (neue Sprachen/Stimmen)
-- [ ] TTS-Text-Aufbereitung: Code-Bloecke rausfiltern, Einheiten ausschreiben ("22GB" → "zweiundzwanzig Gigabyte"). Zwei Varianten denkbar: (a) server-side Cleanup in Bridge, (b) ARIA schreibt `` Block der in UI hidden bleibt aber fuer TTS genutzt wird.
-- [ ] Piper evtl. komplett entfernen (klingt schlecht vs. XTTS) — oder nur als Fallback wenn XTTS offline ist
### Architektur
- [ ] Bilder: Claude Vision direkt nutzen (aktuell nur Dateipfad an ARIA)