diff --git a/android/src/services/audio.ts b/android/src/services/audio.ts index 59c9c2a..59db530 100644 --- a/android/src/services/audio.ts +++ b/android/src/services/audio.ts @@ -388,11 +388,22 @@ class AudioService { if (db > -100) { this.vadBaselineSamples.push(db); if (this.vadBaselineSamples.length === VAD_BASELINE_SAMPLES) { - const avg = this.vadBaselineSamples.reduce((a, b) => a + b, 0) / VAD_BASELINE_SAMPLES; - this.vadAdaptiveSilenceDb = avg + VAD_SILENCE_OFFSET_DB; - this.vadAdaptiveSpeechDb = avg + VAD_SPEECH_OFFSET_DB; - const msg = `VAD: ambient=${avg.toFixed(0)}dB stille>${this.vadAdaptiveSilenceDb.toFixed(0)}dB`; - console.log('[Audio] %s speech>%s', msg, this.vadAdaptiveSpeechDb.toFixed(1)); + // Minimum statt Mittelwert: robust gegen Spike-Samples (z.B. wenn + // der User direkt nach Wake-Word sofort spricht oder das Wake-Word- + // Echo noch im Mikro ist). Min ist der ruhigste Moment. + const lowest = Math.min(...this.vadBaselineSamples); + const rawSilence = lowest + VAD_SILENCE_OFFSET_DB; + const rawSpeech = lowest + VAD_SPEECH_OFFSET_DB; + // Cap auf einen vernuenftigen Bereich: + // - Silence-Schwelle nicht ueber -28dB (sonst zaehlt Hintergrund- + // geraeusch dauerhaft als "Sprache" → VAD feuert nie) + // - Silence-Schwelle nicht unter -50dB (sonst zu strikt) + this.vadAdaptiveSilenceDb = Math.max(-50, Math.min(rawSilence, -28)); + this.vadAdaptiveSpeechDb = Math.max(-40, Math.min(rawSpeech, -18)); + const msg = `VAD: ambient=${lowest.toFixed(0)}dB stille>${this.vadAdaptiveSilenceDb.toFixed(0)}dB`; + console.log('[Audio] %s speech>%s (raw silence=%s speech=%s)', + msg, this.vadAdaptiveSpeechDb.toFixed(1), + rawSilence.toFixed(1), rawSpeech.toFixed(1)); try { ToastAndroid.show(msg, ToastAndroid.SHORT); } catch {} } }