feat: NO_REPLY-Filter + Audio-Ducking + TTS-Cleanup
1) NO_REPLY Token wird in Bridge und Diagnostic erkannt und still verworfen. Toleranz fuer Variationen (Whitespace, Punkt, Quotes). Kein Chat-Eintrag, kein TTS. 2) AudioFocusModule (Kotlin) mit requestDuck / requestExclusive / release. AudioService ruft: - requestExclusive() bei Aufnahme-Start → andere Apps pausieren - requestDuck() bei TTS-Playback-Start → andere Apps leiser - release() bei Stop/Queue-Ende MainApplication registriert AudioFocusPackage. 3) clean_text_for_tts() in Bridge — zentrale Aufbereitung: - <voice>...</voice> Tag wird bevorzugt (falls ARIA es schreibt) - Code-Bloecke (``` und `) komplett raus - Markdown (Fett/Kursiv/Links/Headings/Listen) geschleift - Einheiten ausgeschrieben: 22GB → 22 Gigabyte, 85% → 85 Prozent - Abkuerzungen buchstabiert: CPU → C P U, API → A P I - URLs durch "ein Link" ersetzt Genutzt in VoiceEngine.synthesize und im XTTS-Request — Chat-Text an die App bleibt unveraendert (original Markdown), nur TTS kriegt die aufbereitete Version. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
23add7a107
commit
8b0a72dc9b
|
|
@ -0,0 +1,93 @@
|
|||
package com.ariacockpit
|
||||
|
||||
import android.content.Context
|
||||
import android.media.AudioAttributes
|
||||
import android.media.AudioFocusRequest
|
||||
import android.media.AudioManager
|
||||
import android.os.Build
|
||||
import com.facebook.react.bridge.Promise
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.bridge.ReactContextBaseJavaModule
|
||||
import com.facebook.react.bridge.ReactMethod
|
||||
|
||||
/**
|
||||
* Steuert Audio-Focus fuer Ducking/Muten anderer Apps.
|
||||
*
|
||||
* - requestDuck() → andere Apps werden leiser (ARIA spricht TTS)
|
||||
* - requestExclusive() → andere Apps werden pausiert (Mikrofon-Aufnahme)
|
||||
* - release() → Focus abgeben, andere Apps duerfen wieder
|
||||
*/
|
||||
class AudioFocusModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||
override fun getName() = "AudioFocus"
|
||||
|
||||
private var currentRequest: AudioFocusRequest? = null
|
||||
|
||||
private fun audioManager(): AudioManager? =
|
||||
reactApplicationContext.getSystemService(Context.AUDIO_SERVICE) as? AudioManager
|
||||
|
||||
private fun requestFocus(durationHint: Int, usage: Int, promise: Promise) {
|
||||
val am = audioManager()
|
||||
if (am == null) {
|
||||
promise.reject("NO_AUDIO_MANAGER", "AudioManager nicht verfuegbar")
|
||||
return
|
||||
}
|
||||
|
||||
release()
|
||||
|
||||
val result: Int = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
val attrs = AudioAttributes.Builder()
|
||||
.setUsage(usage)
|
||||
.setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
|
||||
.build()
|
||||
val req = AudioFocusRequest.Builder(durationHint)
|
||||
.setAudioAttributes(attrs)
|
||||
.setOnAudioFocusChangeListener { /* kein Callback noetig */ }
|
||||
.build()
|
||||
currentRequest = req
|
||||
am.requestAudioFocus(req)
|
||||
} else {
|
||||
@Suppress("DEPRECATION")
|
||||
am.requestAudioFocus(null, AudioManager.STREAM_MUSIC, durationHint)
|
||||
}
|
||||
|
||||
promise.resolve(result == AudioManager.AUDIOFOCUS_REQUEST_GRANTED)
|
||||
}
|
||||
|
||||
/** Andere Apps werden leiser (TTS spricht). */
|
||||
@ReactMethod
|
||||
fun requestDuck(promise: Promise) {
|
||||
requestFocus(
|
||||
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK,
|
||||
AudioAttributes.USAGE_ASSISTANT,
|
||||
promise,
|
||||
)
|
||||
}
|
||||
|
||||
/** Andere Apps werden pausiert (Mikrofon-Aufnahme / Gespraech). */
|
||||
@ReactMethod
|
||||
fun requestExclusive(promise: Promise) {
|
||||
requestFocus(
|
||||
AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_EXCLUSIVE,
|
||||
AudioAttributes.USAGE_VOICE_COMMUNICATION,
|
||||
promise,
|
||||
)
|
||||
}
|
||||
|
||||
/** Focus abgeben — andere Apps duerfen wieder volle Lautstaerke. */
|
||||
@ReactMethod
|
||||
fun release(promise: Promise) {
|
||||
release()
|
||||
promise.resolve(true)
|
||||
}
|
||||
|
||||
private fun release() {
|
||||
val am = audioManager() ?: return
|
||||
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O) {
|
||||
currentRequest?.let { am.abandonAudioFocusRequest(it) }
|
||||
} else {
|
||||
@Suppress("DEPRECATION")
|
||||
am.abandonAudioFocus(null)
|
||||
}
|
||||
currentRequest = null
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package com.ariacockpit
|
||||
|
||||
import com.facebook.react.ReactPackage
|
||||
import com.facebook.react.bridge.NativeModule
|
||||
import com.facebook.react.bridge.ReactApplicationContext
|
||||
import com.facebook.react.uimanager.ViewManager
|
||||
|
||||
class AudioFocusPackage : ReactPackage {
|
||||
override fun createNativeModules(reactContext: ReactApplicationContext): List<NativeModule> {
|
||||
return listOf(AudioFocusModule(reactContext))
|
||||
}
|
||||
|
||||
override fun createViewManagers(reactContext: ReactApplicationContext): List<ViewManager<*, *>> {
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
|
|
@ -19,6 +19,7 @@ class MainApplication : Application(), ReactApplication {
|
|||
override fun getPackages(): List<ReactPackage> =
|
||||
PackageList(this).packages.apply {
|
||||
add(ApkInstallerPackage())
|
||||
add(AudioFocusPackage())
|
||||
}
|
||||
|
||||
override fun getJSMainModuleName(): String = "index"
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
|
||||
*/
|
||||
|
||||
import { Platform, PermissionsAndroid } from 'react-native';
|
||||
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
||||
import Sound from 'react-native-sound';
|
||||
import RNFS from 'react-native-fs';
|
||||
import AudioRecorderPlayer, {
|
||||
|
|
@ -16,6 +16,15 @@ import AudioRecorderPlayer, {
|
|||
OutputFormatAndroidType,
|
||||
} from 'react-native-audio-recorder-player';
|
||||
|
||||
// Native Module fuer Audio-Focus (Ducking/Muten anderer Apps)
|
||||
const { AudioFocus } = NativeModules as {
|
||||
AudioFocus?: {
|
||||
requestDuck: () => Promise<boolean>;
|
||||
requestExclusive: () => Promise<boolean>;
|
||||
release: () => Promise<boolean>;
|
||||
};
|
||||
};
|
||||
|
||||
// --- Typen ---
|
||||
|
||||
export interface RecordingResult {
|
||||
|
|
@ -172,6 +181,9 @@ class AudioService {
|
|||
this.speechStartTime = 0;
|
||||
this.setState('recording');
|
||||
|
||||
// Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.)
|
||||
AudioFocus?.requestExclusive().catch(() => {});
|
||||
|
||||
// VAD aktivieren
|
||||
this.vadEnabled = autoStop;
|
||||
if (autoStop) {
|
||||
|
|
@ -220,6 +232,9 @@ class AudioService {
|
|||
await this.recorder.stopRecorder();
|
||||
this.recorder.removeRecordBackListener();
|
||||
|
||||
// Audio-Focus freigeben — andere Apps duerfen wieder
|
||||
AudioFocus?.release().catch(() => {});
|
||||
|
||||
const durationMs = Date.now() - this.recordingStartTime;
|
||||
const hadSpeech = this.speechDetected;
|
||||
|
||||
|
|
@ -278,11 +293,17 @@ class AudioService {
|
|||
private async _playNext(): Promise<void> {
|
||||
if (this.audioQueue.length === 0) {
|
||||
this.isPlaying = false;
|
||||
// Audio-Focus abgeben → andere Apps volle Lautstaerke
|
||||
AudioFocus?.release().catch(() => {});
|
||||
// Alle Audio-Teile abgespielt → Listener benachrichtigen
|
||||
this.playbackFinishedListeners.forEach(cb => cb());
|
||||
return;
|
||||
}
|
||||
|
||||
// Beim ersten Playback-Start: andere Apps ducken
|
||||
if (!this.isPlaying) {
|
||||
AudioFocus?.requestDuck().catch(() => {});
|
||||
}
|
||||
this.isPlaying = true;
|
||||
|
||||
// Preloaded Sound verwenden wenn verfuegbar, sonst neu laden
|
||||
|
|
@ -358,6 +379,8 @@ class AudioService {
|
|||
if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {});
|
||||
this.preloadedPath = '';
|
||||
}
|
||||
// Audio-Focus freigeben
|
||||
AudioFocus?.release().catch(() => {});
|
||||
}
|
||||
|
||||
// --- Status & Callbacks ---
|
||||
|
|
|
|||
|
|
@ -124,6 +124,97 @@ def load_config() -> dict[str, str]:
|
|||
# ── Voice Engine ─────────────────────────────────────────────
|
||||
|
||||
|
||||
import re as _re_tts
|
||||
|
||||
_UNIT_WORDS = [
|
||||
(r'\bTB\b', 'Terabyte'),
|
||||
(r'\bGB\b', 'Gigabyte'),
|
||||
(r'\bMB\b', 'Megabyte'),
|
||||
(r'\bKB\b', 'Kilobyte'),
|
||||
(r'\bkB\b', 'Kilobyte'),
|
||||
(r'\bms\b', 'Millisekunden'),
|
||||
(r'\bkm/h\b', 'Kilometer pro Stunde'),
|
||||
(r'\bkm\b', 'Kilometer'),
|
||||
(r'\bm/s\b', 'Meter pro Sekunde'),
|
||||
(r'\bkg\b', 'Kilogramm'),
|
||||
(r'\b°C\b', 'Grad Celsius'),
|
||||
(r'°C', ' Grad Celsius'),
|
||||
(r'\bMbps\b', 'Megabit pro Sekunde'),
|
||||
(r'\bGbps\b', 'Gigabit pro Sekunde'),
|
||||
(r'\bMhz\b|\bMHz\b', 'Megahertz'),
|
||||
(r'\bGhz\b|\bGHz\b', 'Gigahertz'),
|
||||
(r'%', ' Prozent'),
|
||||
(r'\bCPU\b', 'C P U'),
|
||||
(r'\bGPU\b', 'G P U'),
|
||||
(r'\bRAM\b', 'R A M'),
|
||||
(r'\bSSD\b', 'S S D'),
|
||||
(r'\bHDD\b', 'H D D'),
|
||||
(r'\bURL\b', 'U R L'),
|
||||
(r'\bAPI\b', 'A P I'),
|
||||
(r'\bRVS\b', 'R V S'),
|
||||
(r'\bSSH\b', 'S S H'),
|
||||
(r'\bVM\b', 'V M'),
|
||||
(r'\bUI\b', 'U I'),
|
||||
(r'\bTTS\b', 'T T S'),
|
||||
(r'\bSTT\b', 'S T T'),
|
||||
(r'\bTLS\b', 'T L S'),
|
||||
]
|
||||
|
||||
|
||||
def clean_text_for_tts(text: str) -> str:
|
||||
"""Bereitet Chat-Text fuer Sprachausgabe auf.
|
||||
|
||||
- `<voice>...</voice>` Tag: wenn vorhanden, NUR dieser Inhalt wird gelesen
|
||||
- Code-Bloecke (```...``` und `...`) werden komplett entfernt
|
||||
- Markdown (Fett, Kursiv, Links, Headings, Listen, Zitate) wird abgeraeumt
|
||||
- Einheiten und gaengige Abkuerzungen werden ausgeschrieben (22GB → 22 Gigabyte)
|
||||
- URLs werden durch "ein Link" ersetzt
|
||||
- Mehrfach-Leerzeichen/Umbrueche normalisiert
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# <voice>...</voice> wenn vorhanden → nur das nehmen
|
||||
voice_match = _re_tts.search(r'<voice>([\s\S]*?)</voice>', text, _re_tts.IGNORECASE)
|
||||
if voice_match:
|
||||
text = voice_match.group(1)
|
||||
|
||||
t = text
|
||||
|
||||
# Code-Bloecke komplett raus (Zeilenumbruch statt Platzhalter — sonst bricht Satzlogik)
|
||||
t = _re_tts.sub(r'```[\s\S]*?```', '. ', t)
|
||||
t = _re_tts.sub(r'`[^`]+`', '', t)
|
||||
|
||||
# Markdown
|
||||
t = _re_tts.sub(r'\*\*([^*]+)\*\*', r'\1', t)
|
||||
t = _re_tts.sub(r'\*([^*]+)\*', r'\1', t)
|
||||
t = _re_tts.sub(r'__([^_]+)__', r'\1', t)
|
||||
t = _re_tts.sub(r'\[([^\]]+)\]\((https?://[^)]+)\)', r'\1, ein Link', t)
|
||||
t = _re_tts.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', t)
|
||||
t = _re_tts.sub(r'https?://\S+', 'ein Link', t)
|
||||
t = _re_tts.sub(r'^#{1,6}\s*', '', t, flags=_re_tts.MULTILINE)
|
||||
t = _re_tts.sub(r'^>\s*', '', t, flags=_re_tts.MULTILINE)
|
||||
t = _re_tts.sub(r'^[\-\*]\s+', '', t, flags=_re_tts.MULTILINE)
|
||||
|
||||
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
|
||||
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
|
||||
|
||||
# Einheiten/Abkuerzungen ausschreiben
|
||||
for pat, repl in _UNIT_WORDS:
|
||||
t = _re_tts.sub(pat, repl, t)
|
||||
|
||||
# Anfuehrungszeichen
|
||||
t = _re_tts.sub(r'["""„`]', '', t)
|
||||
|
||||
# Absaetze/Zeilenumbrueche normalisieren
|
||||
t = _re_tts.sub(r'\n{2,}', '. ', t)
|
||||
t = _re_tts.sub(r'\n', ', ', t)
|
||||
t = _re_tts.sub(r'\s{2,}', ' ', t)
|
||||
t = _re_tts.sub(r'\s*\.\s*\.\s*', '. ', t)
|
||||
|
||||
return t.strip()
|
||||
|
||||
|
||||
class VoiceEngine:
|
||||
"""Verwaltet Piper TTS mit zwei Stimmen: Ramona und Thorsten."""
|
||||
|
||||
|
|
@ -201,21 +292,9 @@ class VoiceEngine:
|
|||
return None
|
||||
|
||||
try:
|
||||
# Markdown + Sonderzeichen entfernen fuer natuerliche Sprache
|
||||
# Zentraler TTS-Cleanup (Markdown, Code, Einheiten, URLs)
|
||||
import re
|
||||
clean = text.strip()
|
||||
clean = re.sub(r'\*\*([^*]+)\*\*', r'\1', clean) # **fett**
|
||||
clean = re.sub(r'\*([^*]+)\*', r'\1', clean) # *kursiv*
|
||||
clean = re.sub(r'`[^`]+`', '', clean) # `code`
|
||||
clean = re.sub(r'```[\s\S]*?```', '', clean) # Code-Bloecke
|
||||
clean = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', clean) # [text](url)
|
||||
clean = re.sub(r'#{1,6}\s*', '', clean) # ### Ueberschriften
|
||||
clean = re.sub(r'>\s*', '', clean) # > Zitate
|
||||
clean = re.sub(r'[-*]\s+', '', clean) # Listen
|
||||
clean = re.sub(r'\n{2,}', '. ', clean) # Absaetze
|
||||
clean = re.sub(r'\n', ', ', clean) # Zeilenumbrueche
|
||||
clean = re.sub(r'\s{2,}', ' ', clean) # Mehrfach-Leerzeichen
|
||||
clean = re.sub(r'["""„]', '', clean) # Anfuehrungszeichen
|
||||
clean = clean_text_for_tts(text)
|
||||
sentences = re.split(r'(?<=[.!?])\s+', clean)
|
||||
sentences = [s.strip() for s in sentences if s.strip()]
|
||||
|
||||
|
|
@ -867,6 +946,14 @@ class ARIABridge:
|
|||
- Leitet Antwort an die App weiter (via RVS)
|
||||
- Sprachausgabe ueber TTS (wenn Modus erlaubt)
|
||||
"""
|
||||
# NO_REPLY Token: ARIA signalisiert explizit "nicht antworten"
|
||||
# → komplett verwerfen (keine Chat-Nachricht, kein TTS)
|
||||
# Toleranz fuer Variationen: "NO_REPLY", "no_reply", mit Punkt/Anfuehrungszeichen
|
||||
stripped = text.strip().strip('."\'`*').upper()
|
||||
if stripped == "NO_REPLY" or stripped.startswith("NO_REPLY"):
|
||||
logger.info("[core] NO_REPLY empfangen — Antwort still verworfen")
|
||||
return
|
||||
|
||||
metadata = payload.get("metadata", {})
|
||||
is_critical = metadata.get("critical", False)
|
||||
requested_voice = metadata.get("voice")
|
||||
|
|
@ -905,20 +992,24 @@ class ARIABridge:
|
|||
tts_engine = getattr(self, 'tts_engine_type', 'piper')
|
||||
|
||||
if tts_engine == "xtts":
|
||||
# XTTS: Ganzen Text senden, XTTS-Bridge teilt satzweise auf
|
||||
# XTTS: aufbereiteter Text (Code-Bloecke raus, Einheiten ausgeschrieben)
|
||||
xtts_voice = getattr(self, 'xtts_voice', '')
|
||||
tts_text = clean_text_for_tts(text)
|
||||
if not tts_text:
|
||||
logger.info("[core] TTS-Text leer nach Cleanup — XTTS uebersprungen")
|
||||
return
|
||||
try:
|
||||
await self._send_to_rvs({
|
||||
"type": "xtts_request",
|
||||
"payload": {
|
||||
"text": text,
|
||||
"text": tts_text,
|
||||
"voice": xtts_voice,
|
||||
"language": "de",
|
||||
"requestId": str(uuid.uuid4()),
|
||||
},
|
||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
||||
})
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", text[:60])
|
||||
logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60])
|
||||
except Exception as e:
|
||||
logger.warning("[core] XTTS-Request fehlgeschlagen: %s — Fallback auf Piper", e)
|
||||
# Fallback auf Piper
|
||||
|
|
|
|||
|
|
@ -391,6 +391,19 @@ function handleGatewayMessage(msg) {
|
|||
const runId = payload.runId || "";
|
||||
if (runId && seenFinalRuns.has(runId)) return; // Duplikat
|
||||
if (runId) { seenFinalRuns.add(runId); setTimeout(() => seenFinalRuns.delete(runId), 60000); }
|
||||
|
||||
// NO_REPLY → ARIA signalisiert "nicht antworten", Pipeline beenden aber nichts zeigen
|
||||
const trimmed = (text || "").trim().replace(/^["'`*.\s]+|["'`*.\s]+$/g, "").toUpperCase();
|
||||
if (trimmed === "NO_REPLY" || trimmed.startsWith("NO_REPLY")) {
|
||||
log("info", "gateway", "NO_REPLY empfangen — still verworfen");
|
||||
lastChatFinalAt = Date.now();
|
||||
if (pipelineActive) pipelineEnd(true, "NO_REPLY (stumm)");
|
||||
broadcast({ type: "agent_activity", activity: "idle" });
|
||||
pendingMessageTime = 0;
|
||||
updateAgentActivity();
|
||||
return;
|
||||
}
|
||||
|
||||
log("info", "gateway", `ANTWORT: "${text.slice(0, 200)}"`);
|
||||
lastChatFinalAt = Date.now();
|
||||
if (pipelineActive) pipelineEnd(true, `"${text.slice(0, 120)}"`);
|
||||
|
|
|
|||
Loading…
Reference in New Issue