diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 4292361..c028cbe 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -611,6 +611,13 @@ class ARIABridge: self._last_chat_final_at: float = 0.0 # requestId → messageId Map fuer XTTS-Audio-Cache (App-seitige Zuordnung) self._xtts_request_to_message: dict[str, str] = {} + # Voice-Router (Multi-Threading, 06/2026): sticky Projekt-Kontext fuer + # STT-Voice-Nachrichten. Wechselt via „fuer :"-Prefix, faellt nach + # STICKY_TIMEOUT_SEC ohne neue Voice-Message zurueck auf Hauptchat. + # Meta-Kommandos („zurueck zum hauptchat") werden client-seitig + # interceptiert und aendern hier den Sticky OHNE Brain-Roundtrip. + self._voice_sticky_project_id: str = "" + self._voice_sticky_expires_at: float = 0.0 # Voice-Override aus letzter Chat-Nachricht einer App. # Wird fuer die direkt folgende ARIA-Antwort genutzt und dann zurueckgesetzt. # So kann jedes Geraet seine bevorzugte Stimme bekommen (pro Request). @@ -2976,6 +2983,81 @@ class ARIABridge: else: logger.info("[rvs] Keine Sprache erkannt — ignoriert") + # Voice-Router-Konstanten + _VOICE_STICKY_TIMEOUT_SEC = 30.0 + _VOICE_META_BACK_TO_MAIN = re.compile( + r"^\s*(?:aria[,.]?\s+)?(?:zur(?:ü|ue)ck\s+zum\s+hauptchat|hauptchat\s+bitte|aria\s+hauptchat)\s*[.!?]?\s*$", + re.IGNORECASE, + ) + _VOICE_META_PROJECT_PREFIX = re.compile( + r"^\s*(?:aria[,.]?\s+)?(?:f(?:ü|ue)r|ins?)\s+([\w\-äöüßÄÖÜ]{2,40})[:\-,]\s*(.+?)\s*$", + re.IGNORECASE | re.DOTALL, + ) + + def _apply_voice_router(self, text: str) -> tuple[bool, str, str, str]: + """Voice-Router: entscheidet ob ein STT-Text ans Brain geht und wenn ja + an welchen Projekt-Kontext. + + Returns (should_forward, cleaned_text, project_id, meta_action): + - should_forward=False: reines Meta-Kommando, kein Brain-Call. + meta_action beschreibt was passiert ist (broadcastet an UI). + - should_forward=True: cleaned_text ans Brain, project_id ist Focus. + Bei Prefix wird der Prefix aus dem Text entfernt. + + Sticky-Logik: nach einem projekt-getaggten Voice-Turn wird der Sticky + 30s lang gehalten. Innerhalb dieses Fensters gehen weitere Voice-Msgs + OHNE Prefix in dasselbe Projekt. Nach Ablauf: Default Hauptchat. + """ + import time as _time + now = _time.time() + stripped = text.strip() + + # 1) Meta: zurueck zum Hauptchat + if self._VOICE_META_BACK_TO_MAIN.match(stripped): + self._voice_sticky_project_id = "" + self._voice_sticky_expires_at = 0.0 + return (False, "", "", "back_to_main") + + # 2) Prefix: "fuer : " + m = self._VOICE_META_PROJECT_PREFIX.match(stripped) + if m: + name = m.group(1) + remainder = m.group(2).strip() + # Fuzzy-Match auf Projekt via Brain-API + try: + brain_url = os.environ.get("BRAIN_URL", "http://aria-brain:8080") + with urllib.request.urlopen(f"{brain_url}/projects/list", timeout=3) as r: + projects = json.loads(r.read()).get("projects", []) + from difflib import SequenceMatcher + best, best_score = None, 0.0 + q = name.lower() + for p in projects: + pname = p.get("name", "").lower() + if q == pname or q == p.get("id", ""): + best, best_score = p, 1.0 + break + s = SequenceMatcher(None, q, pname).ratio() + if s > best_score: + best, best_score = p, s + if best and best_score >= 0.6: + pid = best["id"] + self._voice_sticky_project_id = pid + self._voice_sticky_expires_at = now + self._VOICE_STICKY_TIMEOUT_SEC + logger.info("[voice-router] Prefix → Projekt '%s' (id=%s, score=%.2f)", + best.get("name"), pid, best_score) + return (True, remainder or stripped, pid, "project_prefix") + except Exception as exc: + logger.warning("[voice-router] Prefix-Match fehlgeschlagen: %s", exc) + # Kein Match → als normale Nachricht weiter (Sticky wenn aktiv) + # 3) Kein Meta / Prefix → Sticky oder Default + if self._voice_sticky_project_id and now < self._voice_sticky_expires_at: + # Sticky refreshen + self._voice_sticky_expires_at = now + self._VOICE_STICKY_TIMEOUT_SEC + return (True, stripped, self._voice_sticky_project_id, "sticky") + # Sticky abgelaufen — zurücksetzen + self._voice_sticky_project_id = "" + return (True, stripped, "", "default") + async def _process_endpoint_text(self, text: str, interrupted: bool = False, audio_request_id: str = "", @@ -2987,16 +3069,51 @@ class ARIABridge: Spiegel-Methode zu _process_app_audio NACH dem STT-Schritt. Bewusst eigene Methode statt Code-Pfade in _process_app_audio aufdroeseln, damit der Legacy-Pfad (App schickt 'audio') unangetastet bleibt. + + Voice-Router: interceptiert Meta-Kommandos (zurueck zum Hauptchat) + + Prefix-Adressierung („fuer Frankreich: ...") + 30s-Sticky. Meta + selbst geht NICHT ans Brain, sondern broadcastet als project_changed- + Event → App+Diagnostic wechseln den Focus. """ + should_forward, cleaned, project_id, meta_action = self._apply_voice_router(text) + + if meta_action in ("back_to_main", "project_prefix"): + # UI-Focus-Update broadcasten + payload = {"action": "entered" if meta_action == "project_prefix" else "exited"} + if meta_action == "project_prefix" and project_id: + # Namen aus dem Cache holen — best effort + try: + brain_url = os.environ.get("BRAIN_URL", "http://aria-brain:8080") + with urllib.request.urlopen(f"{brain_url}/projects/list", timeout=2) as r: + for p in json.loads(r.read()).get("projects", []): + if p.get("id") == project_id: + payload["id"] = project_id + payload["name"] = p.get("name", "") + break + except Exception: + payload["id"] = project_id + await self._send_to_rvs({ + "type": "project_changed", + "payload": payload, + "timestamp": int(asyncio.get_event_loop().time() * 1000), + }) + + if not should_forward: + logger.info("[voice-router] Meta-Kommando '%s' intercepted, kein Brain-Call", + meta_action) + return + try: stt_payload = { - "text": text, + "text": cleaned, "sender": "stt", } if audio_request_id: stt_payload["audioRequestId"] = audio_request_id if location: stt_payload["location"] = location + if project_id: + stt_payload["projectId"] = project_id ok = await self._send_to_rvs({ "type": "chat", "payload": stt_payload, @@ -3009,10 +3126,11 @@ class ARIABridge: except Exception as e: logger.warning("[rvs] STT-Text (endpoint) konnte nicht broadcastet werden: %s", e) - core_text = self._build_core_text(text, interrupted, location) + core_text = self._build_core_text(cleaned, interrupted, location) await self.send_to_core(core_text, source="app-voice-stream" + (" [barge-in]" if interrupted else ""), - client_msg_id=client_msg_id) + client_msg_id=client_msg_id, + project_id=project_id) async def _stt_remote(self, audio_b64: str, mime_type: str) -> Optional[str]: """Schickt Audio an die whisper-bridge und wartet auf stt_response.