fix: Gespraechsmodus - strenger Speech-Gate + Crash-Prevention

Probleme: - Hintergrundgeraeusche wurden als Sprache erkannt und an Whisper geschickt - App stuerzte nach laengerem Zuhoeren ab (OOM / Cache-Ueberlauf) Aenderungen: - VAD_SPEECH_THRESHOLD_DB -35 -> -28 (filtert Raum-Ambient) - VAD_SPEECH_MIN_MS 300 -> 500 (keine Huestler/Klopfer mehr) - Max-Aufnahmedauer 30s (Notbremse gegen Runaway-Loops) - _cleanupStaleCacheFiles(): alte aria_recording_/aria_tts_ Files (>30s) werden vor jeder neuen Aufnahme geloescht - ChatScreen: capMessages() begrenzt Messages-Array auf 500 Eintraege (OOM-Schutz in langen Gespraechen) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 12:05:15 +02:00
parent aafdbcd57a
commit 6fec8588c1
3 changed files with 52 additions and 9 deletions
@@ -54,6 +54,12 @@ interface ChatMessage {
 const CHAT_STORAGE_KEY = 'aria_chat_messages';
 const MAX_STORED_MESSAGES = 500;
 const MAX_MEMORY_MESSAGES = 500;
 // Hilfe: Messages-Array auf Max kappen (aelteste raus) — verhindert OOM
 // im Gespraechsmodus bei sehr vielen Nachrichten.
 const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
  msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
 const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
 const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
@@ -218,12 +224,12 @@ const ChatScreen: React.FC = () => {
        if (sender === 'diagnostic') {
          const diagText = (message.payload.text as string) || '';
          if (diagText) {
-            setMessages(prev => [...prev, {
+            setMessages(prev => capMessages([...prev, {
              id: nextId(),
              sender: 'user',
              text: diagText,
              timestamp: message.timestamp,
-            }]);
+            }]));
          }
          return;
        }
@@ -243,7 +249,7 @@ const ChatScreen: React.FC = () => {
            timestamp: ts,
            attachments: message.payload.attachments as Attachment[] | undefined,
          };
-          return [...prev, ariaMsg];
+          return capMessages([...prev, ariaMsg]);
        });
      }
@@ -318,7 +324,7 @@ const ChatScreen: React.FC = () => {
          timestamp: Date.now(),
          attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
        };
-        setMessages(prev => [...prev, userMsg]);
+        setMessages(prev => capMessages([...prev, userMsg]));
        rvs.send('audio', {
          base64: result.base64,
          durationMs: result.durationMs,
@@ -423,7 +429,7 @@ const ChatScreen: React.FC = () => {
      text,
      timestamp: Date.now(),
    };
-    setMessages(prev => [...prev, userMsg]);
+    setMessages(prev => capMessages([...prev, userMsg]));
    // An RVS senden
    rvs.send('chat', {
@@ -448,7 +454,7 @@ const ChatScreen: React.FC = () => {
      text: '🎙 Spracheingabe wird verarbeitet...',
      timestamp: Date.now(),
    };
-    setMessages(prev => [...prev, userMsg]);
+    setMessages(prev => capMessages([...prev, userMsg]));
    rvs.send('audio', {
      base64: result.base64,
@@ -502,7 +508,7 @@ const ChatScreen: React.FC = () => {
      timestamp: Date.now(),
      attachments,
    };
-    setMessages(prev => [...prev, userMsg]);
+    setMessages(prev => capMessages([...prev, userMsg]));
    // Alle Dateien an RVS senden + auf Disk speichern
    for (const { file, isPhoto } of pendingAttachments) {
@@ -42,8 +42,11 @@ const AUDIO_ENCODING = 'audio/wav';
 // VAD (Voice Activity Detection) — Stille-Erkennung
 const VAD_SILENCE_THRESHOLD_DB = -45;  // dB unter dem als "Stille" gilt
 const VAD_SILENCE_DURATION_MS = 1800;  // ms Stille bevor Auto-Stop
-const VAD_SPEECH_THRESHOLD_DB = -35;   // dB ueber dem als "Sprache" gilt (Sprach-Gate)
+const VAD_SPEECH_THRESHOLD_DB = -28;   // dB ueber dem als "Sprache" gilt (Sprach-Gate) — hoeher = weniger Umgebungsgeraeusche
-const VAD_SPEECH_MIN_MS = 300;         // ms Sprache bevor Aufnahme zaehlt
+const VAD_SPEECH_MIN_MS = 500;         // ms Sprache bevor Aufnahme zaehlt — laenger = keine Huestler/Klopfer mehr
 // Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
 const MAX_RECORDING_MS = 30000;
 // --- Audio-Service ---
@@ -71,6 +74,7 @@ class AudioService {
  private vadEnabled: boolean = false;
  private lastSpeechTime: number = 0;
  private vadTimer: ReturnType<typeof setInterval> | null = null;
  private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
  constructor() {
    this.recorder = new AudioRecorderPlayer();
@@ -120,6 +124,10 @@ class AudioService {
      // Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
      this.stopPlayback();
      // Aufraeumen: Alte aria_recording_ und aria_tts_ Files loeschen
      // (Schutz gegen Cache-Ueberlauf im Gespraechsmodus bei vielen Zyklen)
      this._cleanupStaleCacheFiles().catch(() => {});
      this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
      // Aufnahme mit Metering starten
@@ -174,6 +182,11 @@ class AudioService {
            this.silenceListeners.forEach(cb => cb());
          }
        }, 200);
        // Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen
        this.maxDurationTimer = setTimeout(() => {
          console.warn(`[Audio] Max-Dauer ${MAX_RECORDING_MS}ms erreicht — Zwangs-Stop`);
          this.silenceListeners.forEach(cb => cb());
        }, MAX_RECORDING_MS);
      }
      console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
@@ -198,6 +211,10 @@ class AudioService {
      clearInterval(this.vadTimer);
      this.vadTimer = null;
    }
    if (this.maxDurationTimer) {
      clearTimeout(this.maxDurationTimer);
      this.maxDurationTimer = null;
    }
    try {
      await this.recorder.stopRecorder();
@@ -379,6 +396,24 @@ class AudioService {
      this.stateListeners.forEach(cb => cb(state));
    }
  }
  /** Alte Aufnahme- und TTS-Files aus dem Cache loeschen (>30s alt). */
  private async _cleanupStaleCacheFiles(): Promise<void> {
    try {
      const files = await RNFS.readDir(RNFS.CachesDirectoryPath);
      const now = Date.now();
      for (const f of files) {
        if (!f.isFile()) continue;
        if (!f.name.startsWith('aria_recording_') && !f.name.startsWith('aria_tts_')) continue;
        const age = now - (f.mtime ? f.mtime.getTime() : 0);
        if (age > 30000) {
          await RNFS.unlink(f.path).catch(() => {});
        }
      }
    } catch {
      // silent — cleanup ist best-effort
    }
  }
 }
 // Singleton
@@ -37,6 +37,8 @@
 - [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
 - [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
 - [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
 - [x] Gespraechsmodus: Speech-Gate strenger (-28dB / 500ms) — keine Umgebungsgeraeusche mehr
 - [x] Gespraechsmodus: Max-Dauer 30s pro Aufnahme, Cache-Cleanup alter Files, Messages-Array gekappt (500)
 ## Offen