From 81ca3cc7a7e62e8aa21bbd99db2eb7d0c3fe63e0 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Wed, 1 Apr 2026 23:45:25 +0200 Subject: [PATCH] =?UTF-8?q?Ohr-Button=20Absturz=20gefixt=20(LiveAudioStrea?= =?UTF-8?q?m=20entfernt,=20Phase=201=20,=20Play-Button=20in=20ARIA-Nachric?= =?UTF-8?q?hten=20fuer=20Sprachwiedergabe=20-=20[x]=20Chat-Suche=20in=20de?= =?UTF-8?q?r=20App=20(Lupe=20in=20Statusleiste)=20-=20[x]=20Watchdog=20mit?= =?UTF-8?q?=20Container-Restart=20(2min=20Warnung=20=E2=86=92=205min=20doc?= =?UTF-8?q?tor=20--fix=20=E2=86=92=208min=20Restart),Abbrechen-Button=20im?= =?UTF-8?q?=20Diagnostic=20Chat=20-=20[x]=20Nachrichten=20Backup=20on-the-?= =?UTF-8?q?fly=20(/shared/config/chat=5Fbackup.jsonl)=20-=20[x]=20Grosse?= =?UTF-8?q?=20Nachrichten=20satzweise=20aufteilen=20fuer=20TTS=20-=20[x]?= =?UTF-8?q?=20RVS=20Nachrichten=20vom=20Smartphone=20gehen=20durch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- android/src/screens/ChatScreen.tsx | 60 ++++++++++++++++++++- android/src/services/wakeword.ts | 84 +++--------------------------- bridge/aria_bridge.py | 24 +++++++++ diagnostic/index.html | 12 ++++- diagnostic/server.js | 48 +++++++++++++++-- issue.md | 50 +++++++++++------- rvs/server.js | 2 +- 7 files changed, 174 insertions(+), 106 deletions(-) diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx index 22ff768..56d13bf 100644 --- a/android/src/screens/ChatScreen.tsx +++ b/android/src/screens/ChatScreen.tsx @@ -91,6 +91,8 @@ const ChatScreen: React.FC = () => { const [gpsEnabled, setGpsEnabled] = useState(false); const [wakeWordActive, setWakeWordActive] = useState(false); const [fullscreenImage, setFullscreenImage] = useState(null); + const [searchQuery, setSearchQuery] = useState(''); + const [searchVisible, setSearchVisible] = useState(false); const flatListRef = useRef(null); const messageIdCounter = useRef(0); @@ -581,6 +583,18 @@ const ChatScreen: React.FC = () => { {item.text} )} + {/* Play-Button fuer ARIA-Nachrichten */} + {!isUser && item.text.length > 0 && ( + { + // TTS-Request an Bridge senden + rvs.send('tts_request' as any, { text: item.text, voice: '' }); + }} + > + {'\uD83D\uDD0A'} + + )} {time} ); @@ -603,12 +617,32 @@ const ChatScreen: React.FC = () => { {connectionState === 'connected' ? 'Verbunden' : connectionState === 'connecting' ? 'Verbinde...' : 'Getrennt'} + setSearchVisible(!searchVisible)} style={{marginLeft: 'auto', paddingHorizontal: 8}}> + {'\uD83D\uDD0D'} + + {/* Suchleiste */} + {searchVisible && ( + + + { setSearchVisible(false); setSearchQuery(''); }}> + X + + + )} + {/* Nachrichtenliste */} m.text.toLowerCase().includes(searchQuery.toLowerCase())) : messages} keyExtractor={item => item.id} renderItem={renderMessage} contentContainerStyle={styles.messageList} @@ -887,6 +921,30 @@ const styles = StyleSheet.create({ wakeWordIcon: { fontSize: 16, }, + searchBar: { + flexDirection: 'row', + alignItems: 'center', + backgroundColor: '#12122A', + paddingHorizontal: 12, + paddingVertical: 6, + borderBottomWidth: 1, + borderBottomColor: '#1E1E2E', + }, + searchInput: { + flex: 1, + color: '#FFFFFF', + fontSize: 14, + paddingVertical: 4, + }, + playButton: { + alignSelf: 'flex-end', + paddingHorizontal: 8, + paddingVertical: 2, + marginTop: 4, + }, + playButtonText: { + fontSize: 16, + }, fullscreenOverlay: { flex: 1, backgroundColor: 'rgba(0,0,0,0.95)', diff --git a/android/src/services/wakeword.ts b/android/src/services/wakeword.ts index 9a66694..b50a833 100644 --- a/android/src/services/wakeword.ts +++ b/android/src/services/wakeword.ts @@ -1,21 +1,12 @@ /** * Wake Word Service — "ARIA" Erkennung * - * Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring. - * Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips - * zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge). + * Phase 1: Deaktiviert — react-native-live-audio-stream hat native Bridge-Probleme. + * Nutzt stattdessen Tap-to-Talk (VoiceButton) als primaeren Eingabemodus. * - * Architektur: - * App (Mikrofon) → Energie-Erkennung → Audio-Buffer - * → RVS "wake_check" → Bridge → openwakeword → Bestaetigung - * → App startet Aufnahme - * - * Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop. - * Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword. + * Phase 2: Porcupine on-device "ARIA" Keyword (geplant). */ -import LiveAudioStream from 'react-native-live-audio-stream'; - type WakeWordCallback = () => void; type StateCallback = (state: WakeWordState) => void; @@ -25,47 +16,16 @@ class WakeWordService { private state: WakeWordState = 'off'; private wakeCallbacks: WakeWordCallback[] = []; private stateCallbacks: StateCallback[] = []; - private isInitialized = false; /** Wake Word Erkennung starten */ async start(): Promise { if (this.state === 'listening') return true; try { - if (!this.isInitialized) { - LiveAudioStream.init({ - sampleRate: 16000, - channels: 1, - bitsPerSample: 16, - audioSource: 6, // VOICE_RECOGNITION - bufferSize: 4096, - }); - this.isInitialized = true; - } - - // Audio-Stream starten und auf Energie pruefen - LiveAudioStream.start(); - - LiveAudioStream.on('data', (base64Chunk: string) => { - if (this.state !== 'listening') return; - - // Base64 → Int16 Array → RMS berechnen - const raw = this._base64ToInt16(base64Chunk); - const rms = this._calculateRMS(raw); - - // Schwellwert: wenn laut genug → Wake Word erkannt - // Phase 1: Einfache Energie-Erkennung (jemand spricht) - // Phase 2: Porcupine "ARIA" Keyword - if (rms > 2000) { - this.setState('detected'); - this.wakeCallbacks.forEach(cb => cb()); - // Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon - this.stop(); - } - }); - + // Phase 1: LiveAudioStream deaktiviert (native Bridge instabil) + // Stattdessen: Tap-to-Talk als primaerer Modus + console.log('[WakeWord] Wake Word ist in Phase 1 noch nicht verfuegbar — nutze Tap-to-Talk'); this.setState('listening'); - console.log('[WakeWord] Listening gestartet'); return true; } catch (err) { console.error('[WakeWord] Start fehlgeschlagen:', err); @@ -75,22 +35,12 @@ class WakeWordService { /** Wake Word Erkennung stoppen */ stop(): void { - if (this.state === 'off') return; - try { - LiveAudioStream.stop(); - } catch {} this.setState('off'); - console.log('[WakeWord] Gestoppt'); } /** Nach Aufnahme erneut starten */ async resume(): Promise { - // Kurze Pause damit Aufnahme das Mikrofon freigeben kann - setTimeout(() => { - if (this.state === 'off') { - this.start(); - } - }, 500); + // Nichts zu tun in Phase 1 } // --- Callbacks --- @@ -113,32 +63,12 @@ class WakeWordService { return this.state; } - // --- Hilfsfunktionen --- - private setState(state: WakeWordState): void { if (this.state !== state) { this.state = state; this.stateCallbacks.forEach(cb => cb(state)); } } - - private _base64ToInt16(base64: string): Int16Array { - const binary = atob(base64); - const bytes = new Uint8Array(binary.length); - for (let i = 0; i < binary.length; i++) { - bytes[i] = binary.charCodeAt(i); - } - return new Int16Array(bytes.buffer); - } - - private _calculateRMS(samples: Int16Array): number { - if (samples.length === 0) return 0; - let sum = 0; - for (let i = 0; i < samples.length; i++) { - sum += samples[i] * samples[i]; - } - return Math.sqrt(sum / samples.length); - } } const wakeWordService = new WakeWordService(); diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 9d193dd..933a72a 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -1014,6 +1014,30 @@ class ARIABridge: if sender in ("aria", "stt"): return + elif msg_type == "tts_request": + # App fordert TTS-Audio fuer einen Text an (Play-Button) + text = payload.get("text", "") + requested_voice = payload.get("voice", "") + if text: + voice_name = requested_voice or self.voice_engine.select_voice(text) + audio_data = self.voice_engine.synthesize(text, voice_name) + if audio_data: + audio_b64 = base64.b64encode(audio_data).decode("ascii") + try: + await self._send_to_rvs({ + "type": "audio", + "payload": { + "base64": audio_b64, + "mimeType": "audio/wav", + "voice": voice_name, + }, + "timestamp": int(asyncio.get_event_loop().time() * 1000), + }) + logger.info("[rvs] TTS on-demand: %d bytes (%s)", len(audio_data), voice_name) + except Exception as e: + logger.warning("[rvs] TTS on-demand senden fehlgeschlagen: %s", e) + return + elif msg_type == "config": # Konfiguration von App/Diagnostic empfangen + persistent speichern changed = False diff --git a/diagnostic/index.html b/diagnostic/index.html index e457763..cd8f1bb 100644 --- a/diagnostic/index.html +++ b/diagnostic/index.html @@ -201,8 +201,9 @@
-