diff --git a/android/src/screens/ChatScreen.tsx b/android/src/screens/ChatScreen.tsx index f56ad72..4959cec 100644 --- a/android/src/screens/ChatScreen.tsx +++ b/android/src/screens/ChatScreen.tsx @@ -263,15 +263,22 @@ const ChatScreen: React.FC = () => { if (message.type === 'chat') { const sender = (message.payload.sender as string) || ''; - // STT-Ergebnis: Transkribierten Text in die Sprach-Bubble schreiben + // STT-Ergebnis: Transkribierten Text in die Sprach-Bubble schreiben. + // WICHTIG: Nur die ERSTE noch unaufgeloeste Aufnahme matchen — sonst + // wuerde bei zwei kurz hintereinander gesendeten Audios beide Bubbles + // den gleichen Text bekommen (Bug: zweite Antwort ueberschreibt erste). if (sender === 'stt') { const sttText = (message.payload.text as string) || ''; if (sttText) { - setMessages(prev => prev.map(m => - m.sender === 'user' && m.text.includes('Spracheingabe wird verarbeitet') - ? { ...m, text: `\uD83C\uDFA4 ${sttText}` } - : m - )); + setMessages(prev => { + const idx = prev.findIndex(m => + m.sender === 'user' && m.text.includes('Spracheingabe wird verarbeitet') + ); + if (idx < 0) return prev; + const next = prev.slice(); + next[idx] = { ...next[idx], text: `\uD83C\uDFA4 ${sttText}` }; + return next; + }); } return; } @@ -572,6 +579,8 @@ const ChatScreen: React.FC = () => { }; setMessages(prev => capMessages([...prev, userMsg])); + console.log('[Chat] sende mit voice=%s speed=%s', + localXttsVoiceRef.current || '(default)', ttsSpeedRef.current); // An RVS senden — mit geraetelokaler Voice (Bridge nutzt sie fuer die Antwort) rvs.send('chat', { text, diff --git a/android/src/services/audio.ts b/android/src/services/audio.ts index a0cd062..d0087f0 100644 --- a/android/src/services/audio.ts +++ b/android/src/services/audio.ts @@ -328,11 +328,12 @@ class AudioService { }; if (autoStop) { const vadSilenceMs = await loadVadSilenceMs(); - console.log('[Audio] VAD-Stille:', vadSilenceMs, 'ms'); + console.log('[Audio] startRecording: autoStop=true, VAD-Stille=%dms, MAX=%dms', + vadSilenceMs, MAX_RECORDING_MS); this.vadTimer = setInterval(() => { const silenceDuration = Date.now() - this.lastSpeechTime; if (silenceDuration >= vadSilenceMs) { - fireSilenceOnce(`VAD ${silenceDuration}ms Stille`); + fireSilenceOnce(`VAD ${silenceDuration}ms Stille (Schwelle=${vadSilenceMs}ms)`); } }, 200); // Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen diff --git a/android/src/services/wakeword.ts b/android/src/services/wakeword.ts index fa9f4d7..c0029f8 100644 --- a/android/src/services/wakeword.ts +++ b/android/src/services/wakeword.ts @@ -17,6 +17,7 @@ */ import AsyncStorage from '@react-native-async-storage/async-storage'; +import { ToastAndroid } from 'react-native'; type WakeWordCallback = () => void; type StateCallback = (state: WakeWordState) => void; @@ -80,10 +81,20 @@ class WakeWordService { // Laufende Instanz stoppen await this.disposePorcupine(); - if (!this.accessKey) return false; + if (!this.accessKey) { + console.warn('[WakeWord] configure: kein Access Key gesetzt'); + return false; + } // Neu initialisieren - return this.initPorcupine(); + const ok = await this.initPorcupine(); + if (!ok) { + ToastAndroid.show( + `Wake-Word "${this.keyword}" konnte nicht initialisiert werden — Logs pruefen`, + ToastAndroid.LONG, + ); + } + return ok; } private async initPorcupine(): Promise { @@ -117,10 +128,14 @@ class WakeWordService { this.disposePorcupine().catch(() => {}); }, ); - console.log('[WakeWord] Porcupine init OK (keyword=%s)', this.keyword); + console.log('[WakeWord] Porcupine init OK (keyword=%s, manager=%s)', + this.keyword, this.porcupine ? 'created' : 'NULL'); return true; - } catch (err) { - console.warn('[WakeWord] Porcupine init fehlgeschlagen:', err); + } catch (err: any) { + console.warn('[WakeWord] Porcupine init fehlgeschlagen:', err?.message || err); + console.warn('[WakeWord] err details:', JSON.stringify({ + name: err?.name, code: err?.code, stack: err?.stack?.slice(0, 200), + })); this.porcupine = null; return false; } finally { @@ -146,11 +161,24 @@ class WakeWordService { try { await this.porcupine.start(); console.log('[WakeWord] armed — warte auf Wake Word "%s"', this.keyword); + ToastAndroid.show(`Lausche auf "${this.keyword}"`, ToastAndroid.SHORT); this.setState('armed'); return true; - } catch (err) { - console.warn('[WakeWord] Porcupine start fehlgeschlagen — Fallback Direkt-Konversation:', err); + } catch (err: any) { + console.warn('[WakeWord] Porcupine start fehlgeschlagen — Fallback Direkt-Konversation:', + err?.message || err); + ToastAndroid.show( + `Wake-Word-Start failed: ${err?.message || err}`, + ToastAndroid.LONG, + ); } + } else { + // Kein Porcupine init → User explicit informieren + console.warn('[WakeWord] Porcupine nicht initialisiert — Access Key fehlt? Fallback Direkt-Konversation'); + ToastAndroid.show( + 'Wake-Word nicht verfuegbar — Access Key in Settings setzen', + ToastAndroid.LONG, + ); } // Fallback: direkt in die Konversation console.log('[WakeWord] Konversation startet sofort (kein Wake-Word)'); diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 368ebf1..8a7ae57 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -942,7 +942,8 @@ class ARIABridge: }, "timestamp": int(asyncio.get_event_loop().time() * 1000), }) - logger.info("[core] XTTS-Request gesendet (%s): '%s'", xtts_voice or "default", tts_text[:60]) + logger.info("[core] XTTS-Request gesendet (voice=%s, speed=%.2fx): '%s'", + xtts_voice or "default", xtts_speed, tts_text[:60]) except Exception as e: logger.error("[core] XTTS-Request fehlgeschlagen: %s — kein Audio", e) diff --git a/xtts/f5tts/bridge.py b/xtts/f5tts/bridge.py index 12f6015..6d22b3d 100644 --- a/xtts/f5tts/bridge.py +++ b/xtts/f5tts/bridge.py @@ -507,7 +507,8 @@ async def _do_tts(ws, runner: F5Runner, text: str, voice: str, ref_wav_str, ref_text = str(pair[0]), pair[1].read_text(encoding="utf-8").strip() sentences = split_sentences(text) - logger.info("F5-TTS: %d Satz(e), voice=%s (%s)", len(sentences), voice or "default", ref_wav_str) + logger.info("F5-TTS: %d Satz(e), voice=%s, speed=%.2fx (%s)", + len(sentences), voice or "default", speed, ref_wav_str) chunk_index = 0 pcm_sr = TARGET_SR