Compare commits
15 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a7eb3cf433 | |||
| e4e0e793a8 | |||
| b3d3b8b6bc | |||
| 06bc456221 | |||
| 3461f45207 | |||
| a17d4acc13 | |||
| 62fd9193a1 | |||
| 2329645df4 | |||
| 8a435ddf6c | |||
| 25b754ba31 | |||
| b734593bf2 | |||
| 16847ce6f7 | |||
| 6300829317 | |||
| a1e1ee31bd | |||
| 7ed70b876d |
@@ -530,38 +530,68 @@ cp ARIA-v0.0.3.0.apk ~/ARIA-AGENT/rvs/updates/
|
||||
## XTTS v2 — GPU TTS Server (optional)
|
||||
|
||||
Laeuft auf einem separaten Rechner mit NVIDIA GPU (z.B. Gaming-PC mit RTX 3060).
|
||||
Verbindet sich ueber RVS mit der ARIA-Infrastruktur — kein VPN noetig.
|
||||
Verbindet sich ueber RVS mit der ARIA-Infrastruktur — kein VPN noetig, funktioniert
|
||||
ueber verschiedene Netze hinweg.
|
||||
|
||||
### Architektur
|
||||
|
||||
```
|
||||
Gaming-PC (Windows, RTX 3060, Docker Desktop + WSL2)
|
||||
├── aria-xtts XTTS v2 GPU Server (Port 8020 intern)
|
||||
└── aria-xtts-bridge RVS-Relay (empfaengt Requests, sendet Audio)
|
||||
└── Beide teilen ./voices/ Volume fuer Voice Cloning
|
||||
|
||||
↕ RVS (Rechenzentrum, WebSocket Relay)
|
||||
|
||||
ARIA-VM
|
||||
└── aria-bridge: tts_engine="xtts" → xtts_request via RVS → wartet auf xtts_response
|
||||
```
|
||||
|
||||
### Voraussetzungen
|
||||
|
||||
- Docker Desktop mit WSL2 (Windows) oder Docker mit NVIDIA Runtime (Linux)
|
||||
- NVIDIA Container Toolkit
|
||||
- GPU mit mindestens 4GB VRAM (6GB+ empfohlen)
|
||||
- **Gleicher RVS_TOKEN wie auf der ARIA-VM!**
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
cd xtts
|
||||
cp .env.example .env
|
||||
# .env mit RVS-Verbindungsdaten fuellen (gleiche wie auf der ARIA-VM)
|
||||
# .env mit RVS-Verbindungsdaten fuellen (gleicher Token wie ARIA-VM!)
|
||||
docker compose up -d
|
||||
# Erster Start laedt ~2GB Model herunter
|
||||
# Erster Start laedt ~2GB Model herunter (danach gecacht)
|
||||
```
|
||||
|
||||
**Wichtig:** Der XTTS-Server laeuft intern auf Port **8020** (nicht 8000).
|
||||
Das Model wird im Volume `xtts-models` gecacht und muss nur einmal geladen werden.
|
||||
|
||||
### Features
|
||||
|
||||
- **Natuerliche Stimmen**: Deutlich bessere Qualitaet als Piper
|
||||
- **Voice Cloning**: Eigene Stimme mit 6-10s Audio-Sample
|
||||
- **Voice Cloning**: Eigene Stimme mit 6-10s Audio-Sample (~2s Latenz auf RTX 3060)
|
||||
- **16 Sprachen**: Deutsch, Englisch, Franzoesisch, etc.
|
||||
- **RVS-Integration**: Bridge waehlt automatisch XTTS wenn verfuegbar
|
||||
- **Fallback**: Wenn XTTS nicht erreichbar, nutzt die Bridge automatisch Piper
|
||||
|
||||
### TTS-Engine umschalten
|
||||
|
||||
In der Diagnostic unter Einstellungen → Sprachausgabe:
|
||||
- **TTS aktiv**: Global An/Aus
|
||||
- **TTS Engine**: Piper (lokal, CPU, schnell) oder XTTS v2 (remote, GPU, natuerlich)
|
||||
- **Piper**: Standard-Stimme, Highlight-Stimme, Speed pro Stimme
|
||||
- **XTTS**: Stimmen-Auswahl, Voice Cloning
|
||||
|
||||
### Stimme klonen
|
||||
|
||||
In der Diagnostic unter Einstellungen → Sprachausgabe → XTTS:
|
||||
1. TTS Engine auf "XTTS v2" stellen
|
||||
2. "Stimme klonen" → Audio-Dateien hochladen (WAV/MP3, min. 6-10s)
|
||||
2. "Stimme klonen" → Audio-Dateien hochladen (WAV/MP3, 1-10 Dateien, min. 6-10s gesamt)
|
||||
3. Name vergeben → "Stimme erstellen"
|
||||
4. Neue Stimme in der Auswahl verfuegbar
|
||||
4. "Laden" klicken → neue Stimme in der Auswahl
|
||||
5. Stimme auswaehlen → Config wird automatisch gespeichert
|
||||
|
||||
> **Tipp:** Fuer beste Ergebnisse: saubere Aufnahme, eine Stimme, kein Hintergrund,
|
||||
> 10-30 Sekunden Gesamtlaenge. Mehrere kurze Dateien werden zusammengefuegt.
|
||||
|
||||
---
|
||||
|
||||
@@ -633,6 +663,8 @@ docker exec aria-core ssh aria-wohnung hostname
|
||||
- **Wake Word nur auf VM**: Die Bridge hoert auf "ARIA" ueber das lokale Mikrofon der VM.
|
||||
In der App gibt es Energy-basierte Erkennung (Phase 1). On-device "ARIA"-Keyword (Porcupine) ist Phase 2.
|
||||
- **Audio-Format**: App nimmt AAC/MP4 auf, Bridge konvertiert via FFmpeg zu 16kHz PCM.
|
||||
- **RVS Zombie-Connections**: WebSocket-Verbindungen sterben gelegentlich ohne Fehlermeldung.
|
||||
Bridge hat Ping-Check (5s), Diagnostic nutzt frische Verbindungen pro Request.
|
||||
- **Bildanalyse eingeschraenkt**: Bilder werden in `/shared/uploads/` gespeichert. ARIA kann
|
||||
sie per Bash/Read-Tool oeffnen, aber Claude Vision (direkte Bildanalyse) ist ueber den
|
||||
Proxy-Pfad (`claude --print`) noch nicht moeglich. ARIA sieht den Dateipfad, nicht das Bild.
|
||||
|
||||
@@ -79,8 +79,8 @@ android {
|
||||
applicationId "com.ariacockpit"
|
||||
minSdkVersion rootProject.ext.minSdkVersion
|
||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||
versionCode 205
|
||||
versionName "0.0.2.5"
|
||||
versionCode 206
|
||||
versionName "0.0.2.6"
|
||||
// Fallback fuer Libraries mit Product Flavors
|
||||
missingDimensionStrategy 'react-native-camera', 'general'
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "aria-cockpit",
|
||||
"version": "0.0.2.5",
|
||||
"version": "0.0.2.6",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"android": "react-native run-android",
|
||||
|
||||
@@ -748,7 +748,7 @@ const SettingsScreen: React.FC = () => {
|
||||
<Text style={styles.sectionTitle}>{'\u00DC'}ber</Text>
|
||||
<View style={styles.card}>
|
||||
<Text style={styles.aboutTitle}>ARIA Cockpit</Text>
|
||||
<Text style={styles.aboutVersion}>Version 0.0.2.5 </Text>
|
||||
<Text style={styles.aboutVersion}>Version 0.0.2.6 </Text>
|
||||
<Text style={styles.aboutInfo}>
|
||||
Stefans Kommandozentrale f{'\u00FC'}r ARIA.{'\n'}
|
||||
Gebaut mit React Native + TypeScript.
|
||||
|
||||
@@ -55,6 +55,10 @@ class AudioService {
|
||||
private recorder: AudioRecorderPlayer;
|
||||
private recordingPath: string = '';
|
||||
|
||||
// Audio-Queue fuer sequentielle TTS-Wiedergabe
|
||||
private audioQueue: string[] = [];
|
||||
private isPlaying: boolean = false;
|
||||
|
||||
// VAD State
|
||||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
@@ -198,15 +202,27 @@ class AudioService {
|
||||
|
||||
// --- Wiedergabe ---
|
||||
|
||||
/** Base64-kodiertes Audio abspielen (z.B. TTS-Antwort von ARIA) */
|
||||
/** Base64-kodiertes Audio in die Queue stellen und abspielen */
|
||||
async playAudio(base64Data: string): Promise<void> {
|
||||
if (!base64Data) return;
|
||||
|
||||
// Laufende Wiedergabe stoppen
|
||||
this.stopPlayback();
|
||||
this.audioQueue.push(base64Data);
|
||||
if (!this.isPlaying) {
|
||||
this._playNext();
|
||||
}
|
||||
}
|
||||
|
||||
/** Naechstes Audio aus der Queue abspielen */
|
||||
private async _playNext(): Promise<void> {
|
||||
if (this.audioQueue.length === 0) {
|
||||
this.isPlaying = false;
|
||||
return;
|
||||
}
|
||||
|
||||
this.isPlaying = true;
|
||||
const base64Data = this.audioQueue.shift()!;
|
||||
|
||||
try {
|
||||
// Base64 -> temporaere WAV-Datei -> Sound abspielen
|
||||
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
||||
|
||||
@@ -214,6 +230,7 @@ class AudioService {
|
||||
if (error) {
|
||||
console.error('[Audio] Fehler beim Laden:', error);
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
this._playNext();
|
||||
return;
|
||||
}
|
||||
this.currentSound?.play((success) => {
|
||||
@@ -225,15 +242,20 @@ class AudioService {
|
||||
this.currentSound?.release();
|
||||
this.currentSound = null;
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
// Naechstes Audio abspielen
|
||||
this._playNext();
|
||||
});
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('[Audio] Wiedergabefehler:', err);
|
||||
this._playNext();
|
||||
}
|
||||
}
|
||||
|
||||
/** Laufende Wiedergabe stoppen */
|
||||
/** Laufende Wiedergabe stoppen + Queue leeren */
|
||||
stopPlayback(): void {
|
||||
this.audioQueue = [];
|
||||
this.isPlaying = false;
|
||||
if (this.currentSound) {
|
||||
this.currentSound.stop();
|
||||
this.currentSound.release();
|
||||
|
||||
+16
-2
@@ -851,7 +851,7 @@ class ARIABridge:
|
||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
|
||||
if tts_engine == "xtts":
|
||||
# XTTS: Request ueber RVS an Gaming-PC senden
|
||||
# XTTS: Ganzen Text senden, XTTS-Bridge teilt satzweise auf
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
@@ -1354,10 +1354,24 @@ class ARIABridge:
|
||||
pass
|
||||
|
||||
async def _send_to_rvs(self, message: dict) -> None:
|
||||
"""Sendet eine Nachricht an die App (via RVS)."""
|
||||
"""Sendet eine Nachricht an die App (via RVS) mit Verbindungs-Check."""
|
||||
if self.ws_rvs is None:
|
||||
return
|
||||
|
||||
# Ping-Check: Verbindung wirklich aktiv?
|
||||
try:
|
||||
pong = await self.ws_rvs.ping()
|
||||
await asyncio.wait_for(pong, timeout=5)
|
||||
except Exception:
|
||||
logger.warning("[rvs] Ping fehlgeschlagen — Verbindung tot, erzwinge Reconnect")
|
||||
try:
|
||||
await self.ws_rvs.close()
|
||||
except Exception:
|
||||
pass
|
||||
self.ws_rvs = None
|
||||
# Reconnect wird vom connect_to_rvs Loop uebernommen
|
||||
return
|
||||
|
||||
try:
|
||||
await self.ws_rvs.send(json.dumps(message))
|
||||
except Exception:
|
||||
|
||||
+48
-22
@@ -401,6 +401,12 @@
|
||||
<div class="settings-section">
|
||||
<h2>Sprachausgabe</h2>
|
||||
<div class="card" style="max-width:500px;">
|
||||
<!-- TTS aktiv (global fuer alle Engines) -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<label style="color:#8888AA;font-size:12px;">TTS aktiv:</label>
|
||||
<label class="toggle"><input type="checkbox" id="diag-tts-enabled" checked onchange="sendVoiceConfig()"><span class="slider"></span></label>
|
||||
</div>
|
||||
|
||||
<!-- TTS Engine Auswahl -->
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<label style="color:#8888AA;font-size:12px;">TTS Engine:</label>
|
||||
@@ -426,10 +432,6 @@
|
||||
<option value="ramona">Ramona (weiblich)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
||||
<label style="color:#8888AA;font-size:12px;">TTS aktiv:</label>
|
||||
<label class="toggle"><input type="checkbox" id="diag-tts-enabled" checked onchange="sendVoiceConfig()"><span class="slider"></span></label>
|
||||
</div>
|
||||
<div style="margin-bottom:4px;">
|
||||
<label style="color:#8888AA;font-size:12px;">Ramona Speed: <span id="speed-ramona-label">1.0x</span></label>
|
||||
</div>
|
||||
@@ -744,7 +746,16 @@
|
||||
document.getElementById('diag-speed-thorsten').value = st;
|
||||
document.getElementById('speed-thorsten-label').textContent = st + 'x';
|
||||
document.getElementById('diag-tts-engine').value = msg.ttsEngine || 'piper';
|
||||
document.getElementById('diag-xtts-voice').value = msg.xttsVoice || '';
|
||||
// XTTS-Voice setzen — Option hinzufuegen falls nicht vorhanden
|
||||
const xttsSelect = document.getElementById('diag-xtts-voice');
|
||||
const xttsVoice = msg.xttsVoice || '';
|
||||
if (xttsVoice && !Array.from(xttsSelect.options).some(o => o.value === xttsVoice)) {
|
||||
const opt = document.createElement('option');
|
||||
opt.value = xttsVoice;
|
||||
opt.textContent = xttsVoice;
|
||||
xttsSelect.appendChild(opt);
|
||||
}
|
||||
xttsSelect.value = xttsVoice;
|
||||
toggleXTTSPanel();
|
||||
return;
|
||||
}
|
||||
@@ -1247,7 +1258,16 @@
|
||||
}
|
||||
|
||||
function loadXTTSVoices() {
|
||||
sendToRVS_raw({ type: 'xtts_list_voices', payload: {}, timestamp: Date.now() });
|
||||
send({ action: 'xtts_list_voices' });
|
||||
}
|
||||
|
||||
function arrayBufferToBase64(buffer) {
|
||||
const bytes = new Uint8Array(buffer);
|
||||
let binary = '';
|
||||
for (let i = 0; i < bytes.length; i += 8192) {
|
||||
binary += String.fromCharCode.apply(null, bytes.subarray(i, i + 8192));
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
async function uploadVoiceSamples() {
|
||||
@@ -1255,25 +1275,31 @@
|
||||
const files = document.getElementById('xtts-clone-files').files;
|
||||
if (!name) { alert('Bitte einen Namen eingeben'); return; }
|
||||
if (!files || files.length === 0) { alert('Bitte Audio-Dateien auswaehlen'); return; }
|
||||
if (files.length > 10) { alert('Maximal 10 Dateien'); return; }
|
||||
|
||||
document.getElementById('xtts-clone-status').textContent = `Lade ${files.length} Datei(en) hoch...`;
|
||||
const status = document.getElementById('xtts-clone-status');
|
||||
status.textContent = `Lade ${files.length} Datei(en)...`;
|
||||
status.style.color = '#FFD60A';
|
||||
|
||||
const samples = [];
|
||||
for (const file of files) {
|
||||
const buffer = await file.arrayBuffer();
|
||||
const base64 = btoa(String.fromCharCode(...new Uint8Array(buffer)));
|
||||
samples.push({ base64, name: file.name, size: file.size });
|
||||
try {
|
||||
const samples = [];
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
status.textContent = `Lese Datei ${i + 1}/${files.length}: ${files[i].name}...`;
|
||||
const buffer = await files[i].arrayBuffer();
|
||||
const base64 = arrayBufferToBase64(buffer);
|
||||
samples.push({ base64, name: files[i].name, size: files[i].size });
|
||||
}
|
||||
|
||||
const totalSize = samples.reduce((s, f) => s + f.size, 0);
|
||||
status.textContent = `Sende ${samples.length} Sample(s) (${(totalSize / 1024).toFixed(0)}KB)...`;
|
||||
|
||||
send({ action: 'voice_upload', name, samples });
|
||||
|
||||
status.textContent = `Gesendet — warte auf Bestaetigung vom XTTS-Server...`;
|
||||
} catch (err) {
|
||||
status.textContent = `Fehler: ${err.message}`;
|
||||
status.style.color = '#FF3B30';
|
||||
}
|
||||
|
||||
const totalSize = samples.reduce((s, f) => s + f.size, 0);
|
||||
document.getElementById('xtts-clone-status').textContent =
|
||||
`Sende ${samples.length} Sample(s) (${(totalSize / 1024).toFixed(0)}KB) an XTTS-Server...`;
|
||||
|
||||
sendToRVS_raw({
|
||||
type: 'voice_upload',
|
||||
payload: { name, samples },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
// ── Abbrechen ──────────────────────────────
|
||||
|
||||
@@ -560,6 +560,31 @@ function connectRVS(forcePlain) {
|
||||
});
|
||||
}
|
||||
|
||||
function sendToRVS_withResponse(sendType, sendPayload, expectType, clientWs) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) return;
|
||||
const proto = RVS_TLS === "true" ? "wss" : "ws";
|
||||
const url = `${proto}://${RVS_HOST}:${RVS_PORT}?token=${RVS_TOKEN}`;
|
||||
const freshWs = new WebSocket(url);
|
||||
const timeout = setTimeout(() => {
|
||||
try { freshWs.close(); } catch (_) {}
|
||||
clientWs.send(JSON.stringify({ type: expectType, payload: { voices: [], error: "Timeout" }, timestamp: Date.now() }));
|
||||
}, 15000);
|
||||
freshWs.on("open", () => {
|
||||
freshWs.send(JSON.stringify({ type: sendType, payload: sendPayload, timestamp: Date.now() }));
|
||||
});
|
||||
freshWs.on("message", (raw) => {
|
||||
try {
|
||||
const resp = JSON.parse(raw.toString());
|
||||
if (resp.type === expectType) {
|
||||
clearTimeout(timeout);
|
||||
clientWs.send(JSON.stringify(resp));
|
||||
setTimeout(() => { try { freshWs.close(); } catch (_) {} }, 1000);
|
||||
}
|
||||
} catch {}
|
||||
});
|
||||
freshWs.on("error", () => {});
|
||||
}
|
||||
|
||||
function sendToRVS_raw(msgObj) {
|
||||
if (!RVS_HOST || !RVS_TOKEN) return;
|
||||
const proto = RVS_TLS === "true" ? "wss" : "ws";
|
||||
@@ -1165,6 +1190,13 @@ wss.on("connection", (ws) => {
|
||||
if (pipelineActive) pipelineEnd(false, "Vom Benutzer abgebrochen");
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
dockerExec("aria-core", "openclaw doctor --fix 2>/dev/null || true").catch(() => {});
|
||||
} else if (msg.action === "voice_upload") {
|
||||
// Voice-Samples an XTTS-Bridge via RVS weiterleiten, auf Bestätigung warten
|
||||
log("info", "server", `Voice-Upload '${msg.name}' (${(msg.samples || []).length} Samples) sende an RVS...`);
|
||||
sendToRVS_withResponse("voice_upload", { name: msg.name, samples: msg.samples }, "xtts_voice_saved", ws);
|
||||
} else if (msg.action === "xtts_list_voices") {
|
||||
// Frische Verbindung die auf Antwort wartet
|
||||
sendToRVS_withResponse("xtts_list_voices", {}, "xtts_voices_list", ws);
|
||||
} else if (msg.action === "get_voice_config") {
|
||||
handleGetVoiceConfig(ws);
|
||||
} else if (msg.action === "send_voice_config") {
|
||||
|
||||
+36
-28
@@ -97,39 +97,47 @@ async function handleTTSRequest(payload) {
|
||||
const { text, voice, requestId, language } = payload;
|
||||
if (!text) return;
|
||||
|
||||
log(`TTS-Request: "${text.slice(0, 60)}..." (voice: ${voice || "default"}, lang: ${language || "de"})`);
|
||||
// Markdown entfernen
|
||||
const cleanText = text.replace(/\*\*([^*]+)\*\*/g, "$1").trim();
|
||||
|
||||
// Text in Saetze aufteilen (sequentiell rendern fuer korrekte Reihenfolge)
|
||||
const sentences = cleanText.split(/(?<=[.!?])\s+/).map(s => s.trim()).filter(s => s.length > 0);
|
||||
if (sentences.length === 0) return;
|
||||
|
||||
log(`TTS-Request: "${cleanText.slice(0, 60)}..." (${sentences.length} Saetze, voice: ${voice || "default"}, lang: ${language || "de"})`);
|
||||
|
||||
try {
|
||||
// Voice-Sample Pfad bestimmen
|
||||
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
||||
|
||||
// XTTS API aufrufen
|
||||
const audioBuffer = await callXTTSAPI(text, language || "de", hasCustomVoice ? voiceSample : null);
|
||||
// Jeden Satz sequentiell rendern und sofort senden
|
||||
for (let i = 0; i < sentences.length; i++) {
|
||||
const sentence = sentences[i];
|
||||
try {
|
||||
const audioBuffer = await callXTTSAPI(sentence, language || "de", hasCustomVoice ? voiceSample : null);
|
||||
|
||||
if (audioBuffer && audioBuffer.length > 100) {
|
||||
const base64 = audioBuffer.toString("base64");
|
||||
log(`TTS fertig: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB)`);
|
||||
if (audioBuffer && audioBuffer.length > 100) {
|
||||
const base64 = audioBuffer.toString("base64");
|
||||
log(`TTS [${i + 1}/${sentences.length}]: ${audioBuffer.length} bytes (${(audioBuffer.length / 1024).toFixed(0)}KB) — "${sentence.slice(0, 40)}..."`);
|
||||
|
||||
sendToRVS({
|
||||
type: "xtts_response",
|
||||
payload: {
|
||||
requestId: requestId || "",
|
||||
base64,
|
||||
mimeType: "audio/wav",
|
||||
voice: voice || "default",
|
||||
engine: "xtts",
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
} else {
|
||||
log("TTS: Leeres Audio erhalten");
|
||||
sendToRVS({
|
||||
type: "xtts_response",
|
||||
payload: { requestId, error: "Leeres Audio" },
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
sendToRVS({
|
||||
type: "xtts_response",
|
||||
payload: {
|
||||
requestId: `${requestId || ""}_${i}`,
|
||||
base64,
|
||||
mimeType: "audio/wav",
|
||||
voice: voice || "default",
|
||||
engine: "xtts",
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
});
|
||||
}
|
||||
} catch (sentenceErr) {
|
||||
log(`TTS [${i + 1}/${sentences.length}] Fehler: ${sentenceErr.message} — ueberspringe`);
|
||||
}
|
||||
}
|
||||
|
||||
log(`TTS komplett: ${sentences.length} Saetze gerendert`);
|
||||
} catch (err) {
|
||||
log(`TTS Fehler: ${err.message}`);
|
||||
sendToRVS({
|
||||
@@ -257,12 +265,12 @@ log(`RVS: ${RVS_HOST}:${RVS_PORT}`);
|
||||
function waitForXTTS(callback, attempts) {
|
||||
if (attempts <= 0) { log("XTTS API nicht erreichbar — starte trotzdem"); callback(); return; }
|
||||
http.get(`${XTTS_API_URL}/docs`, (res) => {
|
||||
log("XTTS API erreichbar");
|
||||
log(`XTTS API erreichbar (HTTP ${res.statusCode})`);
|
||||
callback();
|
||||
}).on("error", () => {
|
||||
log(`XTTS API noch nicht bereit — warte (${attempts} Versuche uebrig)...`);
|
||||
setTimeout(() => waitForXTTS(callback, attempts - 1), 5000);
|
||||
setTimeout(() => waitForXTTS(callback, attempts - 1), 10000); // 10s statt 5s (Model laden dauert)
|
||||
});
|
||||
}
|
||||
|
||||
waitForXTTS(() => connectRVS(), 24); // Max 2min warten
|
||||
waitForXTTS(() => connectRVS(), 30); // Max 5min warten
|
||||
|
||||
@@ -17,7 +17,7 @@ services:
|
||||
|
||||
# ─── XTTS v2 API Server (GPU) ─────────────────
|
||||
xtts:
|
||||
image: ghcr.io/daswer123/xtts-api-server:latest
|
||||
image: daswer123/xtts-api-server:latest
|
||||
container_name: aria-xtts
|
||||
deploy:
|
||||
resources:
|
||||
@@ -27,9 +27,9 @@ services:
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
ports:
|
||||
- "8000:8000"
|
||||
- "8000:8020"
|
||||
volumes:
|
||||
- xtts-models:/root/.local/share/tts # Model-Cache (~2GB)
|
||||
- xtts-models:/app/xtts_models # Model-Cache (~2GB)
|
||||
- ./voices:/voices # Custom Voice Samples
|
||||
environment:
|
||||
- COQUI_TOS_AGREED=1
|
||||
@@ -41,8 +41,10 @@ services:
|
||||
container_name: aria-xtts-bridge
|
||||
depends_on:
|
||||
- xtts
|
||||
volumes:
|
||||
- ./voices:/voices # Shared mit XTTS-Server
|
||||
environment:
|
||||
- XTTS_API_URL=http://xtts:8000
|
||||
- XTTS_API_URL=http://xtts:8020
|
||||
- RVS_HOST=${RVS_HOST}
|
||||
- RVS_PORT=${RVS_PORT:-443}
|
||||
- RVS_TLS=${RVS_TLS:-true}
|
||||
|
||||
Reference in New Issue
Block a user