From abc5b971f452e20cc3d95bfbf65ed3e3f43ba703 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Sun, 26 Apr 2026 20:04:19 +0200 Subject: [PATCH] =?UTF-8?q?fix(voice):=20Stimmen-Wechsel=20greift=20wieder?= =?UTF-8?q?=20=E2=80=94=20Override=20bleibt=20bis=20naechster=20Chat-Event?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug: Voice-Override wurde nach der ersten ARIA-Antwort konsumiert. Eine ARIA-Antwort triggert aber oft mehrere TTS-Calls (Tool-Use → Zwischenmeldung → finale Antwort). Der erste nutzte die neue Stimme, alle folgenden fielen auf self.xtts_voice (= alte Voice aus voice_config.json) zurueck. Die App schickt nie ein config-Update, daher blieb voice_config.json fuer immer auf der alten Stimme. Neue Semantik: - chat-/audio-Event mit voice="X" → Override="X", gilt fuer alle folgenden TTS-Calls bis zum naechsten chat-Event - chat-Event mit voice="" → Override geloescht, fallback auf Default-Voice (voice_config.json / Diagnostic) - chat-Event ohne voice-Field → Override unveraendert Audio-Send in ChatScreen.tsx (Push-to-Talk-Pfad) gab voice/speed gar nicht mit; jetzt konsistent mit dem Tap-to-Talk-Pfad. Co-Authored-By: Claude Opus 4.7 (1M context) --- android/src/screens/ChatScreen.tsx | 2 + bridge/aria_bridge.py | 64 +++++++++++++++--------------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx index faaa4be..0f64349 100644 --- a/android/src/screens/ChatScreen.tsx +++ b/android/src/screens/ChatScreen.tsx @@ -619,6 +619,8 @@ const ChatScreen: React.FC = () => { base64: result.base64, durationMs: result.durationMs, mimeType: result.mimeType, + voice: localXttsVoiceRef.current, + speed: ttsSpeedRef.current, ...(location && { location }), }); }, [getCurrentLocation]); diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 8a7ae57..c5a6c01 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -907,18 +907,13 @@ class ARIABridge: logger.info("[core] TTS unterdrueckt (Modus: %s)", self.current_mode.config.name) return - # Voice bestimmen: App-Override fuer diesen Request > globale Default-Voice + # Voice bestimmen: App-Override (gesetzt durch letzten chat-Event) > globale + # Default-Voice. Der Override wird NICHT pro Antwort verbraucht — sonst nutzt + # eine Multi-Turn-Antwort von ARIA (Tool-Use + finale Antwort) ab dem zweiten + # TTS-Call wieder die alte Default-Stimme. Der Override bleibt gueltig bis + # zum naechsten chat-Event, wo er entweder ueberschrieben oder geloescht wird. xtts_voice = self._next_voice_override or getattr(self, 'xtts_voice', '') - # Override verbrauchen (gilt nur fuer genau diese naechste Antwort) - if self._next_voice_override: - logger.info("[core] Nutze Voice-Override: %s", self._next_voice_override) - self._next_voice_override = None - - # Speed ebenfalls aus App-Override nehmen (fallback 1.0) xtts_speed = self._next_speed_override or 1.0 - if self._next_speed_override: - logger.info("[core] Nutze Speed-Override: %.2fx", self._next_speed_override) - self._next_speed_override = None tts_text = tts_text_preview or text if not tts_text: @@ -1169,18 +1164,22 @@ class ARIABridge: if sender in ("aria", "stt"): return text = payload.get("text", "") - # Voice-Override fuer die naechste ARIA-Antwort merken - voice_override = payload.get("voice", "") - if voice_override: - self._next_voice_override = voice_override - logger.info("[rvs] Voice-Override fuer naechste Antwort: %s", voice_override) + # Voice-Override fuer Folgenachrichten setzen — gilt bis zum naechsten + # chat-Event. Leerer String "" = explizit Default-Voice (override loeschen). + # Field nicht gesendet = vorherigen Override unveraendert lassen (z.B. wenn + # cancel_request oder anderer Service die App umgeht). + if "voice" in payload: + voice_override = payload.get("voice", "") or "" + self._next_voice_override = voice_override or None + logger.info("[rvs] Voice fuer Antworten: %s", + self._next_voice_override or "(Default)") # Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet) - try: - speed = float(payload.get("speed", 0) or 0) - if 0.1 <= speed <= 5.0: - self._next_speed_override = speed - except (TypeError, ValueError): - pass + if "speed" in payload: + try: + speed = float(payload.get("speed", 0) or 0) + self._next_speed_override = speed if 0.1 <= speed <= 5.0 else None + except (TypeError, ValueError): + self._next_speed_override = None if text: logger.info("[rvs] App-Chat: '%s'", text[:80]) await self.send_to_core(text, source="app") @@ -1444,17 +1443,18 @@ class ARIABridge: if not audio_b64: logger.warning("[rvs] Audio ohne Daten empfangen") return - # Voice-Override fuer die kommende ARIA-Antwort (App-lokal gewaehlt) - voice_override = payload.get("voice", "") - if voice_override: - self._next_voice_override = voice_override - logger.info("[rvs] Voice-Override (via Audio): %s", voice_override) - try: - speed = float(payload.get("speed", 0) or 0) - if 0.1 <= speed <= 5.0: - self._next_speed_override = speed - except (TypeError, ValueError): - pass + # Voice-Override fuer Folgenachrichten — gleiche Semantik wie beim chat-Event. + if "voice" in payload: + voice_override = payload.get("voice", "") or "" + self._next_voice_override = voice_override or None + logger.info("[rvs] Voice fuer Antworten (via Audio): %s", + self._next_voice_override or "(Default)") + if "speed" in payload: + try: + speed = float(payload.get("speed", 0) or 0) + self._next_speed_override = speed if 0.1 <= speed <= 5.0 else None + except (TypeError, ValueError): + self._next_speed_override = None logger.info("[rvs] Audio empfangen: %s, %dms, %dKB", mime_type, duration_ms, len(audio_b64) // 1365) asyncio.create_task(self._process_app_audio(audio_b64, mime_type))