fix: Gespraechsmodus - strenger Speech-Gate + Crash-Prevention
Probleme: - Hintergrundgeraeusche wurden als Sprache erkannt und an Whisper geschickt - App stuerzte nach laengerem Zuhoeren ab (OOM / Cache-Ueberlauf) Aenderungen: - VAD_SPEECH_THRESHOLD_DB -35 -> -28 (filtert Raum-Ambient) - VAD_SPEECH_MIN_MS 300 -> 500 (keine Huestler/Klopfer mehr) - Max-Aufnahmedauer 30s (Notbremse gegen Runaway-Loops) - _cleanupStaleCacheFiles(): alte aria_recording_/aria_tts_ Files (>30s) werden vor jeder neuen Aufnahme geloescht - ChatScreen: capMessages() begrenzt Messages-Array auf 500 Eintraege (OOM-Schutz in langen Gespraechen) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
aafdbcd57a
commit
6fec8588c1
|
|
@ -54,6 +54,12 @@ interface ChatMessage {
|
|||
|
||||
const CHAT_STORAGE_KEY = 'aria_chat_messages';
|
||||
const MAX_STORED_MESSAGES = 500;
|
||||
const MAX_MEMORY_MESSAGES = 500;
|
||||
|
||||
// Hilfe: Messages-Array auf Max kappen (aelteste raus) — verhindert OOM
|
||||
// im Gespraechsmodus bei sehr vielen Nachrichten.
|
||||
const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
|
||||
msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
|
||||
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
||||
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
||||
|
||||
|
|
@ -218,12 +224,12 @@ const ChatScreen: React.FC = () => {
|
|||
if (sender === 'diagnostic') {
|
||||
const diagText = (message.payload.text as string) || '';
|
||||
if (diagText) {
|
||||
setMessages(prev => [...prev, {
|
||||
setMessages(prev => capMessages([...prev, {
|
||||
id: nextId(),
|
||||
sender: 'user',
|
||||
text: diagText,
|
||||
timestamp: message.timestamp,
|
||||
}]);
|
||||
}]));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
@ -243,7 +249,7 @@ const ChatScreen: React.FC = () => {
|
|||
timestamp: ts,
|
||||
attachments: message.payload.attachments as Attachment[] | undefined,
|
||||
};
|
||||
return [...prev, ariaMsg];
|
||||
return capMessages([...prev, ariaMsg]);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -318,7 +324,7 @@ const ChatScreen: React.FC = () => {
|
|||
timestamp: Date.now(),
|
||||
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
rvs.send('audio', {
|
||||
base64: result.base64,
|
||||
durationMs: result.durationMs,
|
||||
|
|
@ -423,7 +429,7 @@ const ChatScreen: React.FC = () => {
|
|||
text,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
// An RVS senden
|
||||
rvs.send('chat', {
|
||||
|
|
@ -448,7 +454,7 @@ const ChatScreen: React.FC = () => {
|
|||
text: '🎙 Spracheingabe wird verarbeitet...',
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
rvs.send('audio', {
|
||||
base64: result.base64,
|
||||
|
|
@ -502,7 +508,7 @@ const ChatScreen: React.FC = () => {
|
|||
timestamp: Date.now(),
|
||||
attachments,
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
setMessages(prev => capMessages([...prev, userMsg]));
|
||||
|
||||
// Alle Dateien an RVS senden + auf Disk speichern
|
||||
for (const { file, isPhoto } of pendingAttachments) {
|
||||
|
|
|
|||
|
|
@ -42,8 +42,11 @@ const AUDIO_ENCODING = 'audio/wav';
|
|||
// VAD (Voice Activity Detection) — Stille-Erkennung
|
||||
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
||||
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
||||
const VAD_SPEECH_THRESHOLD_DB = -35; // dB ueber dem als "Sprache" gilt (Sprach-Gate)
|
||||
const VAD_SPEECH_MIN_MS = 300; // ms Sprache bevor Aufnahme zaehlt
|
||||
const VAD_SPEECH_THRESHOLD_DB = -28; // dB ueber dem als "Sprache" gilt (Sprach-Gate) — hoeher = weniger Umgebungsgeraeusche
|
||||
const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — laenger = keine Huestler/Klopfer mehr
|
||||
|
||||
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
||||
const MAX_RECORDING_MS = 30000;
|
||||
|
||||
// --- Audio-Service ---
|
||||
|
||||
|
|
@ -71,6 +74,7 @@ class AudioService {
|
|||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
||||
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
||||
|
||||
constructor() {
|
||||
this.recorder = new AudioRecorderPlayer();
|
||||
|
|
@ -120,6 +124,10 @@ class AudioService {
|
|||
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||
this.stopPlayback();
|
||||
|
||||
// Aufraeumen: Alte aria_recording_ und aria_tts_ Files loeschen
|
||||
// (Schutz gegen Cache-Ueberlauf im Gespraechsmodus bei vielen Zyklen)
|
||||
this._cleanupStaleCacheFiles().catch(() => {});
|
||||
|
||||
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
||||
|
||||
// Aufnahme mit Metering starten
|
||||
|
|
@ -174,6 +182,11 @@ class AudioService {
|
|||
this.silenceListeners.forEach(cb => cb());
|
||||
}
|
||||
}, 200);
|
||||
// Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen
|
||||
this.maxDurationTimer = setTimeout(() => {
|
||||
console.warn(`[Audio] Max-Dauer ${MAX_RECORDING_MS}ms erreicht — Zwangs-Stop`);
|
||||
this.silenceListeners.forEach(cb => cb());
|
||||
}, MAX_RECORDING_MS);
|
||||
}
|
||||
|
||||
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
||||
|
|
@ -198,6 +211,10 @@ class AudioService {
|
|||
clearInterval(this.vadTimer);
|
||||
this.vadTimer = null;
|
||||
}
|
||||
if (this.maxDurationTimer) {
|
||||
clearTimeout(this.maxDurationTimer);
|
||||
this.maxDurationTimer = null;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.recorder.stopRecorder();
|
||||
|
|
@ -379,6 +396,24 @@ class AudioService {
|
|||
this.stateListeners.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
/** Alte Aufnahme- und TTS-Files aus dem Cache loeschen (>30s alt). */
|
||||
private async _cleanupStaleCacheFiles(): Promise<void> {
|
||||
try {
|
||||
const files = await RNFS.readDir(RNFS.CachesDirectoryPath);
|
||||
const now = Date.now();
|
||||
for (const f of files) {
|
||||
if (!f.isFile()) continue;
|
||||
if (!f.name.startsWith('aria_recording_') && !f.name.startsWith('aria_tts_')) continue;
|
||||
const age = now - (f.mtime ? f.mtime.getTime() : 0);
|
||||
if (age > 30000) {
|
||||
await RNFS.unlink(f.path).catch(() => {});
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// silent — cleanup ist best-effort
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton
|
||||
|
|
|
|||
2
issue.md
2
issue.md
|
|
@ -37,6 +37,8 @@
|
|||
- [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
|
||||
- [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
|
||||
- [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
|
||||
- [x] Gespraechsmodus: Speech-Gate strenger (-28dB / 500ms) — keine Umgebungsgeraeusche mehr
|
||||
- [x] Gespraechsmodus: Max-Dauer 30s pro Aufnahme, Cache-Cleanup alter Files, Messages-Array gekappt (500)
|
||||
|
||||
## Offen
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue