feat: XTTS v2 integration, auto-update system, TTS engine abstraction

- XTTS v2: Docker setup for Gaming-PC (GPU), bridge via RVS relay
- XTTS: Voice cloning UI in Diagnostic (multi-file upload)
- XTTS: Engine selectable (Piper local vs XTTS remote) with fallback
- Auto-Update: RVS serves APK over WebSocket (no HTTP needed)
- Auto-Update: App checks version on start, prompts install
- Auto-Update: release.sh copies APK to RVS via scp
- Bridge: TTS engine abstraction (piper/xtts), config persistent
- Bridge: xtts_response handler, tts_request on-demand
- Diagnostic: TTS engine dropdown, XTTS voice panel, voice cloning
- App: Play button on ARIA messages, chat search, update service
- Wake word: Disabled LiveAudioStream (crash fix), Phase 1 placeholder
- Watchdog: Container restart after 8min stuck
- Chat backup: on-the-fly to /shared/config/chat_backup.jsonl

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 19:42:10 +02:00
parent 81ca3cc7a7
commit a242693751
16 changed files with 826 additions and 13 deletions
+73 -11
View File
@@ -503,6 +503,8 @@ class ARIABridge:
"thorsten": vc.get("speedThorsten", 1.0),
}
self.tts_enabled = vc.get("ttsEnabled", True)
self.tts_engine_type = vc.get("ttsEngine", "piper")
self.xtts_voice = vc.get("xttsVoice", "")
logger.info("Voice-Config geladen: %s", vc)
except Exception as e:
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
@@ -846,17 +848,47 @@ class ARIABridge:
# TTS-Audio rendern und an die App senden (wenn Modus es erlaubt)
if getattr(self, 'tts_enabled', True) and should_speak(self.current_mode, is_critical):
audio_data = self.voice_engine.synthesize(text, voice_name)
if audio_data:
audio_b64 = base64.b64encode(audio_data).decode("ascii")
await self._send_to_rvs({
"type": "audio",
"payload": {
"base64": audio_b64,
"mimeType": "audio/wav",
"voice": voice_name,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
tts_engine = getattr(self, 'tts_engine_type', 'piper')
if tts_engine == "xtts":
# XTTS: Request ueber RVS an Gaming-PC senden
xtts_voice = getattr(self, 'xtts_voice', '')
try:
await self._send_to_rvs({
"type": "xtts_request",
"payload": {
"text": text,
"voice": xtts_voice,
"language": "de",
"requestId": str(uuid.uuid4()),
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60])
except Exception as e:
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
# Fallback auf Piper
audio_data = self.voice_engine.synthesize(text, voice_name)
if audio_data:
audio_b64 = base64.b64encode(audio_data).decode("ascii")
await self._send_to_rvs({
"type": "audio",
"payload": {"base64": audio_b64, "mimeType": "audio/wav", "voice": voice_name},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
else:
# Piper: Lokal rendern
audio_data = self.voice_engine.synthesize(text, voice_name)
if audio_data:
audio_b64 = base64.b64encode(audio_data).decode("ascii")
await self._send_to_rvs({
"type": "audio",
"payload": {
"base64": audio_b64,
"mimeType": "audio/wav",
"voice": voice_name,
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
logger.info("[core] TTS-Audio gesendet: %d bytes (%s)", len(audio_data), voice_name)
@@ -1014,6 +1046,26 @@ class ARIABridge:
if sender in ("aria", "stt"):
return
elif msg_type == "xtts_response":
# XTTS-Audio vom Gaming-PC empfangen → an App weiterleiten
audio_b64 = payload.get("base64", "")
error = payload.get("error", "")
if error:
logger.warning("[rvs] XTTS Fehler: %s", error)
return
if audio_b64:
logger.info("[rvs] XTTS-Audio empfangen: %dKB", len(audio_b64) // 1365)
await self._send_to_rvs({
"type": "audio",
"payload": {
"base64": audio_b64,
"mimeType": payload.get("mimeType", "audio/wav"),
"voice": payload.get("voice", "xtts"),
},
"timestamp": int(asyncio.get_event_loop().time() * 1000),
})
return
elif msg_type == "tts_request":
# App fordert TTS-Audio fuer einen Text an (Play-Button)
text = payload.get("text", "")
@@ -1057,6 +1109,14 @@ class ARIABridge:
self.tts_enabled = bool(payload["ttsEnabled"])
logger.info("[rvs] TTS %s", "aktiviert" if self.tts_enabled else "deaktiviert")
changed = True
if "ttsEngine" in payload:
self.tts_engine_type = payload["ttsEngine"]
logger.info("[rvs] TTS-Engine: %s", self.tts_engine_type)
changed = True
if "xttsVoice" in payload:
self.xtts_voice = payload["xttsVoice"]
logger.info("[rvs] XTTS-Stimme: %s", self.xtts_voice)
changed = True
if "speedRamona" in payload:
self.voice_engine.speech_speed["ramona"] = max(0.3, min(2.0, float(payload["speedRamona"])))
logger.info("[rvs] Speed Ramona: %.1f", self.voice_engine.speech_speed["ramona"])
@@ -1073,6 +1133,8 @@ class ARIABridge:
"defaultVoice": self.voice_engine.default_voice,
"highlightVoice": self.voice_engine.highlight_voice,
"ttsEnabled": getattr(self, "tts_enabled", True),
"ttsEngine": getattr(self, "tts_engine_type", "piper"),
"xttsVoice": getattr(self, "xtts_voice", ""),
"speedRamona": self.voice_engine.speech_speed.get("ramona", 1.0),
"speedThorsten": self.voice_engine.speech_speed.get("thorsten", 1.0),
}