feat: XTTS v2 integration, auto-update system, TTS engine abstraction
- XTTS v2: Docker setup for Gaming-PC (GPU), bridge via RVS relay - XTTS: Voice cloning UI in Diagnostic (multi-file upload) - XTTS: Engine selectable (Piper local vs XTTS remote) with fallback - Auto-Update: RVS serves APK over WebSocket (no HTTP needed) - Auto-Update: App checks version on start, prompts install - Auto-Update: release.sh copies APK to RVS via scp - Bridge: TTS engine abstraction (piper/xtts), config persistent - Bridge: xtts_response handler, tts_request on-demand - Diagnostic: TTS engine dropdown, XTTS voice panel, voice cloning - App: Play button on ARIA messages, chat search, update service - Wake word: Disabled LiveAudioStream (crash fix), Phase 1 placeholder - Watchdog: Container restart after 8min stuck - Chat backup: on-the-fly to /shared/config/chat_backup.jsonl Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
+73
-11
@@ -503,6 +503,8 @@ class ARIABridge:
|
||||
"thorsten": vc.get("speedThorsten", 1.0),
|
||||
}
|
||||
self.tts_enabled = vc.get("ttsEnabled", True)
|
||||
self.tts_engine_type = vc.get("ttsEngine", "piper")
|
||||
self.xtts_voice = vc.get("xttsVoice", "")
|
||||
logger.info("Voice-Config geladen: %s", vc)
|
||||
except Exception as e:
|
||||
logger.warning("Voice-Config laden fehlgeschlagen: %s", e)
|
||||
@@ -846,17 +848,47 @@ class ARIABridge:
|
||||
|
||||
# TTS-Audio rendern und an die App senden (wenn Modus es erlaubt)
|
||||
if getattr(self, 'tts_enabled', True) and should_speak(self.current_mode, is_critical):
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
|
||||
if tts_engine == "xtts":
|
||||
# XTTS: Request ueber RVS an Gaming-PC senden
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": str(uuid.uuid4()),
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
|
||||
# Fallback auf Piper
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {"base64": audio_b64, "mimeType": "audio/wav", "voice": voice_name},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
else:
|
||||
# Piper: Lokal rendern
|
||||
audio_data = self.voice_engine.synthesize(text, voice_name)
|
||||
if audio_data:
|
||||
audio_b64 = base64.b64encode(audio_data).decode("ascii")
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": "audio/wav",
|
||||
"voice": voice_name,
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] TTS-Audio gesendet: %d bytes (%s)", len(audio_data), voice_name)
|
||||
|
||||
@@ -1014,6 +1046,26 @@ class ARIABridge:
|
||||
if sender in ("aria", "stt"):
|
||||
return
|
||||
|
||||
elif msg_type == "xtts_response":
|
||||
# XTTS-Audio vom Gaming-PC empfangen → an App weiterleiten
|
||||
audio_b64 = payload.get("base64", "")
|
||||
error = payload.get("error", "")
|
||||
if error:
|
||||
logger.warning("[rvs] XTTS Fehler: %s", error)
|
||||
return
|
||||
if audio_b64:
|
||||
logger.info("[rvs] XTTS-Audio empfangen: %dKB", len(audio_b64) // 1365)
|
||||
await self._send_to_rvs({
|
||||
"type": "audio",
|
||||
"payload": {
|
||||
"base64": audio_b64,
|
||||
"mimeType": payload.get("mimeType", "audio/wav"),
|
||||
"voice": payload.get("voice", "xtts"),
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
return
|
||||
|
||||
elif msg_type == "tts_request":
|
||||
# App fordert TTS-Audio fuer einen Text an (Play-Button)
|
||||
text = payload.get("text", "")
|
||||
@@ -1057,6 +1109,14 @@ class ARIABridge:
|
||||
self.tts_enabled = bool(payload["ttsEnabled"])
|
||||
logger.info("[rvs] TTS %s", "aktiviert" if self.tts_enabled else "deaktiviert")
|
||||
changed = True
|
||||
if "ttsEngine" in payload:
|
||||
self.tts_engine_type = payload["ttsEngine"]
|
||||
logger.info("[rvs] TTS-Engine: %s", self.tts_engine_type)
|
||||
changed = True
|
||||
if "xttsVoice" in payload:
|
||||
self.xtts_voice = payload["xttsVoice"]
|
||||
logger.info("[rvs] XTTS-Stimme: %s", self.xtts_voice)
|
||||
changed = True
|
||||
if "speedRamona" in payload:
|
||||
self.voice_engine.speech_speed["ramona"] = max(0.3, min(2.0, float(payload["speedRamona"])))
|
||||
logger.info("[rvs] Speed Ramona: %.1f", self.voice_engine.speech_speed["ramona"])
|
||||
@@ -1073,6 +1133,8 @@ class ARIABridge:
|
||||
"defaultVoice": self.voice_engine.default_voice,
|
||||
"highlightVoice": self.voice_engine.highlight_voice,
|
||||
"ttsEnabled": getattr(self, "tts_enabled", True),
|
||||
"ttsEngine": getattr(self, "tts_engine_type", "piper"),
|
||||
"xttsVoice": getattr(self, "xtts_voice", ""),
|
||||
"speedRamona": self.voice_engine.speech_speed.get("ramona", 1.0),
|
||||
"speedThorsten": self.voice_engine.speech_speed.get("thorsten", 1.0),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user