diff --git a/android/android/app/src/main/java/com/ariacockpit/AudioFocusModule.kt b/android/android/app/src/main/java/com/ariacockpit/AudioFocusModule.kt new file mode 100644 index 0000000..b69017d --- /dev/null +++ b/android/android/app/src/main/java/com/ariacockpit/AudioFocusModule.kt @@ -0,0 +1,93 @@ +package com.ariacockpit + +import android.content.Context +import android.media.AudioAttributes +import android.media.AudioFocusRequest +import android.media.AudioManager +import android.os.Build +import com.facebook.react.bridge.Promise +import com.facebook.react.bridge.ReactApplicationContext +import com.facebook.react.bridge.ReactContextBaseJavaModule +import com.facebook.react.bridge.ReactMethod + +/** + * Steuert Audio-Focus fuer Ducking/Muten anderer Apps. + * + * - requestDuck() → andere Apps werden leiser (ARIA spricht TTS) + * - requestExclusive() → andere Apps werden pausiert (Mikrofon-Aufnahme) + * - release() → Focus abgeben, andere Apps duerfen wieder + */ +class AudioFocusModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) { + override fun getName() = "AudioFocus" + + private var currentRequest: AudioFocusRequest? = null + + private fun audioManager(): AudioManager? = + reactApplicationContext.getSystemService(Context.AUDIO_SERVICE) as? AudioManager + + private fun requestFocus(durationHint: Int, usage: Int, promise: Promise) { + val am = audioManager() + if (am == null) { + promise.reject("NO_AUDIO_MANAGER", "AudioManager nicht verfuegbar") + return + } + + release() + + val result: Int = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + val attrs = AudioAttributes.Builder() + .setUsage(usage) + .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH) + .build() + val req = AudioFocusRequest.Builder(durationHint) + .setAudioAttributes(attrs) + .setOnAudioFocusChangeListener { /* kein Callback noetig */ } + .build() + currentRequest = req + am.requestAudioFocus(req) + } else { + @Suppress("DEPRECATION") + am.requestAudioFocus(null, AudioManager.STREAM_MUSIC, durationHint) + } + + promise.resolve(result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED) + } + + /** Andere Apps werden leiser (TTS spricht). */ + @ReactMethod + fun requestDuck(promise: Promise) { + requestFocus( + AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK, + AudioAttributes.USAGE_ASSISTANT, + promise, + ) + } + + /** Andere Apps werden pausiert (Mikrofon-Aufnahme / Gespraech). */ + @ReactMethod + fun requestExclusive(promise: Promise) { + requestFocus( + AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE, + AudioAttributes.USAGE_VOICE_COMMUNICATION, + promise, + ) + } + + /** Focus abgeben — andere Apps duerfen wieder volle Lautstaerke. */ + @ReactMethod + fun release(promise: Promise) { + release() + promise.resolve(true) + } + + private fun release() { + val am = audioManager() ?: return + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) { + currentRequest?.let { am.abandonAudioFocusRequest(it) } + } else { + @Suppress("DEPRECATION") + am.abandonAudioFocus(null) + } + currentRequest = null + } +} diff --git a/android/android/app/src/main/java/com/ariacockpit/AudioFocusPackage.kt b/android/android/app/src/main/java/com/ariacockpit/AudioFocusPackage.kt new file mode 100644 index 0000000..b659556 --- /dev/null +++ b/android/android/app/src/main/java/com/ariacockpit/AudioFocusPackage.kt @@ -0,0 +1,16 @@ +package com.ariacockpit + +import com.facebook.react.ReactPackage +import com.facebook.react.bridge.NativeModule +import com.facebook.react.bridge.ReactApplicationContext +import com.facebook.react.uimanager.ViewManager + +class AudioFocusPackage : ReactPackage { + override fun createNativeModules(reactContext: ReactApplicationContext): List { + return listOf(AudioFocusModule(reactContext)) + } + + override fun createViewManagers(reactContext: ReactApplicationContext): List> { + return emptyList() + } +} diff --git a/android/android/app/src/main/java/com/ariacockpit/MainApplication.kt b/android/android/app/src/main/java/com/ariacockpit/MainApplication.kt index 16ab703..0fd44b2 100644 --- a/android/android/app/src/main/java/com/ariacockpit/MainApplication.kt +++ b/android/android/app/src/main/java/com/ariacockpit/MainApplication.kt @@ -19,6 +19,7 @@ class MainApplication : Application(), ReactApplication { override fun getPackages(): List = PackageList(this).packages.apply { add(ApkInstallerPackage()) + add(AudioFocusPackage()) } override fun getJSMainModuleName(): String = "index" diff --git a/android/src/services/audio.ts b/android/src/services/audio.ts index ddd2749..3739715 100644 --- a/android/src/services/audio.ts +++ b/android/src/services/audio.ts @@ -6,7 +6,7 @@ * Nutzt react-native-audio-recorder-player fuer Aufnahme. */ -import { Platform, PermissionsAndroid } from 'react-native'; +import { Platform, PermissionsAndroid, NativeModules } from 'react-native'; import Sound from 'react-native-sound'; import RNFS from 'react-native-fs'; import AudioRecorderPlayer, { @@ -16,6 +16,15 @@ import AudioRecorderPlayer, { OutputFormatAndroidType, } from 'react-native-audio-recorder-player'; +// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps) +const { AudioFocus } = NativeModules as { + AudioFocus?: { + requestDuck: () => Promise; + requestExclusive: () => Promise; + release: () => Promise; + }; +}; + // --- Typen --- export interface RecordingResult { @@ -172,6 +181,9 @@ class AudioService { this.speechStartTime = 0; this.setState('recording'); + // Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.) + AudioFocus?.requestExclusive().catch(() => {}); + // VAD aktivieren this.vadEnabled = autoStop; if (autoStop) { @@ -220,6 +232,9 @@ class AudioService { await this.recorder.stopRecorder(); this.recorder.removeRecordBackListener(); + // Audio-Focus freigeben — andere Apps duerfen wieder + AudioFocus?.release().catch(() => {}); + const durationMs = Date.now() - this.recordingStartTime; const hadSpeech = this.speechDetected; @@ -278,11 +293,17 @@ class AudioService { private async _playNext(): Promise { if (this.audioQueue.length === 0) { this.isPlaying = false; + // Audio-Focus abgeben → andere Apps volle Lautstaerke + AudioFocus?.release().catch(() => {}); // Alle Audio-Teile abgespielt → Listener benachrichtigen this.playbackFinishedListeners.forEach(cb => cb()); return; } + // Beim ersten Playback-Start: andere Apps ducken + if (!this.isPlaying) { + AudioFocus?.requestDuck().catch(() => {}); + } this.isPlaying = true; // Preloaded Sound verwenden wenn verfuegbar, sonst neu laden @@ -358,6 +379,8 @@ class AudioService { if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {}); this.preloadedPath = ''; } + // Audio-Focus freigeben + AudioFocus?.release().catch(() => {}); } // --- Status & Callbacks --- diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index d7b3f3e..4355a5b 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -124,6 +124,97 @@ def load_config() -> dict[str, str]: # ── Voice Engine ───────────────────────────────────────────── +import re as _re_tts + +_UNIT_WORDS = [ + (r'\bTB\b', 'Terabyte'), + (r'\bGB\b', 'Gigabyte'), + (r'\bMB\b', 'Megabyte'), + (r'\bKB\b', 'Kilobyte'), + (r'\bkB\b', 'Kilobyte'), + (r'\bms\b', 'Millisekunden'), + (r'\bkm/h\b', 'Kilometer pro Stunde'), + (r'\bkm\b', 'Kilometer'), + (r'\bm/s\b', 'Meter pro Sekunde'), + (r'\bkg\b', 'Kilogramm'), + (r'\b°C\b', 'Grad Celsius'), + (r'°C', ' Grad Celsius'), + (r'\bMbps\b', 'Megabit pro Sekunde'), + (r'\bGbps\b', 'Gigabit pro Sekunde'), + (r'\bMhz\b|\bMHz\b', 'Megahertz'), + (r'\bGhz\b|\bGHz\b', 'Gigahertz'), + (r'%', ' Prozent'), + (r'\bCPU\b', 'C P U'), + (r'\bGPU\b', 'G P U'), + (r'\bRAM\b', 'R A M'), + (r'\bSSD\b', 'S S D'), + (r'\bHDD\b', 'H D D'), + (r'\bURL\b', 'U R L'), + (r'\bAPI\b', 'A P I'), + (r'\bRVS\b', 'R V S'), + (r'\bSSH\b', 'S S H'), + (r'\bVM\b', 'V M'), + (r'\bUI\b', 'U I'), + (r'\bTTS\b', 'T T S'), + (r'\bSTT\b', 'S T T'), + (r'\bTLS\b', 'T L S'), +] + + +def clean_text_for_tts(text: str) -> str: + """Bereitet Chat-Text fuer Sprachausgabe auf. + + - `...` Tag: wenn vorhanden, NUR dieser Inhalt wird gelesen + - Code-Bloecke (```...``` und `...`) werden komplett entfernt + - Markdown (Fett, Kursiv, Links, Headings, Listen, Zitate) wird abgeraeumt + - Einheiten und gaengige Abkuerzungen werden ausgeschrieben (22GB → 22 Gigabyte) + - URLs werden durch "ein Link" ersetzt + - Mehrfach-Leerzeichen/Umbrueche normalisiert + """ + if not text: + return "" + + # ... wenn vorhanden → nur das nehmen + voice_match = _re_tts.search(r'([\s\S]*?)', text, _re_tts.IGNORECASE) + if voice_match: + text = voice_match.group(1) + + t = text + + # Code-Bloecke komplett raus (Zeilenumbruch statt Platzhalter — sonst bricht Satzlogik) + t = _re_tts.sub(r'```[\s\S]*?```', '. ', t) + t = _re_tts.sub(r'`[^`]+`', '', t) + + # Markdown + t = _re_tts.sub(r'\*\*([^*]+)\*\*', r'\1', t) + t = _re_tts.sub(r'\*([^*]+)\*', r'\1', t) + t = _re_tts.sub(r'__([^_]+)__', r'\1', t) + t = _re_tts.sub(r'\[([^\]]+)\]\((https?://[^)]+)\)', r'\1, ein Link', t) + t = _re_tts.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', t) + t = _re_tts.sub(r'https?://\S+', 'ein Link', t) + t = _re_tts.sub(r'^#{1,6}\s*', '', t, flags=_re_tts.MULTILINE) + t = _re_tts.sub(r'^>\s*', '', t, flags=_re_tts.MULTILINE) + t = _re_tts.sub(r'^[\-\*]\s+', '', t, flags=_re_tts.MULTILINE) + + # Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen) + t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t) + + # Einheiten/Abkuerzungen ausschreiben + for pat, repl in _UNIT_WORDS: + t = _re_tts.sub(pat, repl, t) + + # Anfuehrungszeichen + t = _re_tts.sub(r'["""„`]', '', t) + + # Absaetze/Zeilenumbrueche normalisieren + t = _re_tts.sub(r'\n{2,}', '. ', t) + t = _re_tts.sub(r'\n', ', ', t) + t = _re_tts.sub(r'\s{2,}', ' ', t) + t = _re_tts.sub(r'\s*\.\s*\.\s*', '. ', t) + + return t.strip() + + class VoiceEngine: """Verwaltet Piper TTS mit zwei Stimmen: Ramona und Thorsten.""" @@ -201,21 +292,9 @@ class VoiceEngine: return None try: - # Markdown + Sonderzeichen entfernen fuer natuerliche Sprache + # Zentraler TTS-Cleanup (Markdown, Code, Einheiten, URLs) import re - clean = text.strip() - clean = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean) # **fett** - clean = re.sub(r'\*([^*]+)\*', r'\1', clean) # *kursiv* - clean = re.sub(r'`[^`]+`', '', clean) # `code` - clean = re.sub(r'```[\s\S]*?```', '', clean) # Code-Bloecke - clean = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean) # [text](url) - clean = re.sub(r'#{1,6}\s*', '', clean) # ### Ueberschriften - clean = re.sub(r'>\s*', '', clean) # > Zitate - clean = re.sub(r'[-*]\s+', '', clean) # Listen - clean = re.sub(r'\n{2,}', '. ', clean) # Absaetze - clean = re.sub(r'\n', ', ', clean) # Zeilenumbrueche - clean = re.sub(r'\s{2,}', ' ', clean) # Mehrfach-Leerzeichen - clean = re.sub(r'["""„]', '', clean) # Anfuehrungszeichen + clean = clean_text_for_tts(text) sentences = re.split(r'(?<=[.!?])\s+', clean) sentences = [s.strip() for s in sentences if s.strip()] @@ -867,6 +946,14 @@ class ARIABridge: - Leitet Antwort an die App weiter (via RVS) - Sprachausgabe ueber TTS (wenn Modus erlaubt) """ + # NO_REPLY Token: ARIA signalisiert explizit "nicht antworten" + # → komplett verwerfen (keine Chat-Nachricht, kein TTS) + # Toleranz fuer Variationen: "NO_REPLY", "no_reply", mit Punkt/Anfuehrungszeichen + stripped = text.strip().strip('."\'`*').upper() + if stripped == "NO_REPLY" or stripped.startswith("NO_REPLY"): + logger.info("[core] NO_REPLY empfangen — Antwort still verworfen") + return + metadata = payload.get("metadata", {}) is_critical = metadata.get("critical", False) requested_voice = metadata.get("voice") @@ -905,20 +992,24 @@ class ARIABridge: tts_engine = getattr(self, 'tts_engine_type', 'piper') if tts_engine == "xtts": - # XTTS: Ganzen Text senden, XTTS-Bridge teilt satzweise auf + # XTTS: aufbereiteter Text (Code-Bloecke raus, Einheiten ausgeschrieben) xtts_voice = getattr(self, 'xtts_voice', '') + tts_text = clean_text_for_tts(text) + if not tts_text: + logger.info("[core] TTS-Text leer nach Cleanup — XTTS uebersprungen") + return try: await self._send_to_rvs({ "type": "xtts_request", "payload": { - "text": text, + "text": tts_text, "voice": xtts_voice, "language": "de", "requestId": str(uuid.uuid4()), }, "timestamp": int(asyncio.get_event_loop().time() * 1000), }) - logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60]) + logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60]) except Exception as e: logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e) # Fallback auf Piper diff --git a/diagnostic/server.js b/diagnostic/server.js index acba573..c9ab61b 100644 --- a/diagnostic/server.js +++ b/diagnostic/server.js @@ -391,6 +391,19 @@ function handleGatewayMessage(msg) { const runId = payload.runId || ""; if (runId && seenFinalRuns.has(runId)) return; // Duplikat if (runId) { seenFinalRuns.add(runId); setTimeout(() => seenFinalRuns.delete(runId), 60000); } + + // NO_REPLY → ARIA signalisiert "nicht antworten", Pipeline beenden aber nichts zeigen + const trimmed = (text || "").trim().replace(/^["'`*.\s]+|["'`*.\s]+$/g, "").toUpperCase(); + if (trimmed === "NO_REPLY" || trimmed.startsWith("NO_REPLY")) { + log("info", "gateway", "NO_REPLY empfangen — still verworfen"); + lastChatFinalAt = Date.now(); + if (pipelineActive) pipelineEnd(true, "NO_REPLY (stumm)"); + broadcast({ type: "agent_activity", activity: "idle" }); + pendingMessageTime = 0; + updateAgentActivity(); + return; + } + log("info", "gateway", `ANTWORT: "${text.slice(0, 200)}"`); lastChatFinalAt = Date.now(); if (pipelineActive) pipelineEnd(true, `"${text.slice(0, 120)}"`);