fix: Gespraechsmodus - strenger Speech-Gate + Crash-Prevention
Probleme: - Hintergrundgeraeusche wurden als Sprache erkannt und an Whisper geschickt - App stuerzte nach laengerem Zuhoeren ab (OOM / Cache-Ueberlauf) Aenderungen: - VAD_SPEECH_THRESHOLD_DB -35 -> -28 (filtert Raum-Ambient) - VAD_SPEECH_MIN_MS 300 -> 500 (keine Huestler/Klopfer mehr) - Max-Aufnahmedauer 30s (Notbremse gegen Runaway-Loops) - _cleanupStaleCacheFiles(): alte aria_recording_/aria_tts_ Files (>30s) werden vor jeder neuen Aufnahme geloescht - ChatScreen: capMessages() begrenzt Messages-Array auf 500 Eintraege (OOM-Schutz in langen Gespraechen) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -54,6 +54,12 @@ interface ChatMessage {
|
|||||||
|
|
||||||
const CHAT_STORAGE_KEY = 'aria_chat_messages';
|
const CHAT_STORAGE_KEY = 'aria_chat_messages';
|
||||||
const MAX_STORED_MESSAGES = 500;
|
const MAX_STORED_MESSAGES = 500;
|
||||||
|
const MAX_MEMORY_MESSAGES = 500;
|
||||||
|
|
||||||
|
// Hilfe: Messages-Array auf Max kappen (aelteste raus) — verhindert OOM
|
||||||
|
// im Gespraechsmodus bei sehr vielen Nachrichten.
|
||||||
|
const capMessages = (msgs: ChatMessage[]): ChatMessage[] =>
|
||||||
|
msgs.length > MAX_MEMORY_MESSAGES ? msgs.slice(-MAX_MEMORY_MESSAGES) : msgs;
|
||||||
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
const DEFAULT_ATTACHMENT_DIR = `${RNFS.DocumentDirectoryPath}/chat_attachments`;
|
||||||
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
const STORAGE_PATH_KEY = 'aria_attachment_storage_path';
|
||||||
|
|
||||||
@@ -218,12 +224,12 @@ const ChatScreen: React.FC = () => {
|
|||||||
if (sender === 'diagnostic') {
|
if (sender === 'diagnostic') {
|
||||||
const diagText = (message.payload.text as string) || '';
|
const diagText = (message.payload.text as string) || '';
|
||||||
if (diagText) {
|
if (diagText) {
|
||||||
setMessages(prev => [...prev, {
|
setMessages(prev => capMessages([...prev, {
|
||||||
id: nextId(),
|
id: nextId(),
|
||||||
sender: 'user',
|
sender: 'user',
|
||||||
text: diagText,
|
text: diagText,
|
||||||
timestamp: message.timestamp,
|
timestamp: message.timestamp,
|
||||||
}]);
|
}]));
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -243,7 +249,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
timestamp: ts,
|
timestamp: ts,
|
||||||
attachments: message.payload.attachments as Attachment[] | undefined,
|
attachments: message.payload.attachments as Attachment[] | undefined,
|
||||||
};
|
};
|
||||||
return [...prev, ariaMsg];
|
return capMessages([...prev, ariaMsg]);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -318,7 +324,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||||
};
|
};
|
||||||
setMessages(prev => [...prev, userMsg]);
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
rvs.send('audio', {
|
rvs.send('audio', {
|
||||||
base64: result.base64,
|
base64: result.base64,
|
||||||
durationMs: result.durationMs,
|
durationMs: result.durationMs,
|
||||||
@@ -423,7 +429,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
text,
|
text,
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
};
|
};
|
||||||
setMessages(prev => [...prev, userMsg]);
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
|
|
||||||
// An RVS senden
|
// An RVS senden
|
||||||
rvs.send('chat', {
|
rvs.send('chat', {
|
||||||
@@ -448,7 +454,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
text: '🎙 Spracheingabe wird verarbeitet...',
|
text: '🎙 Spracheingabe wird verarbeitet...',
|
||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
};
|
};
|
||||||
setMessages(prev => [...prev, userMsg]);
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
|
|
||||||
rvs.send('audio', {
|
rvs.send('audio', {
|
||||||
base64: result.base64,
|
base64: result.base64,
|
||||||
@@ -502,7 +508,7 @@ const ChatScreen: React.FC = () => {
|
|||||||
timestamp: Date.now(),
|
timestamp: Date.now(),
|
||||||
attachments,
|
attachments,
|
||||||
};
|
};
|
||||||
setMessages(prev => [...prev, userMsg]);
|
setMessages(prev => capMessages([...prev, userMsg]));
|
||||||
|
|
||||||
// Alle Dateien an RVS senden + auf Disk speichern
|
// Alle Dateien an RVS senden + auf Disk speichern
|
||||||
for (const { file, isPhoto } of pendingAttachments) {
|
for (const { file, isPhoto } of pendingAttachments) {
|
||||||
|
|||||||
@@ -42,8 +42,11 @@ const AUDIO_ENCODING = 'audio/wav';
|
|||||||
// VAD (Voice Activity Detection) — Stille-Erkennung
|
// VAD (Voice Activity Detection) — Stille-Erkennung
|
||||||
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
||||||
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
||||||
const VAD_SPEECH_THRESHOLD_DB = -35; // dB ueber dem als "Sprache" gilt (Sprach-Gate)
|
const VAD_SPEECH_THRESHOLD_DB = -28; // dB ueber dem als "Sprache" gilt (Sprach-Gate) — hoeher = weniger Umgebungsgeraeusche
|
||||||
const VAD_SPEECH_MIN_MS = 300; // ms Sprache bevor Aufnahme zaehlt
|
const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — laenger = keine Huestler/Klopfer mehr
|
||||||
|
|
||||||
|
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
||||||
|
const MAX_RECORDING_MS = 30000;
|
||||||
|
|
||||||
// --- Audio-Service ---
|
// --- Audio-Service ---
|
||||||
|
|
||||||
@@ -71,6 +74,7 @@ class AudioService {
|
|||||||
private vadEnabled: boolean = false;
|
private vadEnabled: boolean = false;
|
||||||
private lastSpeechTime: number = 0;
|
private lastSpeechTime: number = 0;
|
||||||
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
||||||
|
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.recorder = new AudioRecorderPlayer();
|
this.recorder = new AudioRecorderPlayer();
|
||||||
@@ -120,6 +124,10 @@ class AudioService {
|
|||||||
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||||
this.stopPlayback();
|
this.stopPlayback();
|
||||||
|
|
||||||
|
// Aufraeumen: Alte aria_recording_ und aria_tts_ Files loeschen
|
||||||
|
// (Schutz gegen Cache-Ueberlauf im Gespraechsmodus bei vielen Zyklen)
|
||||||
|
this._cleanupStaleCacheFiles().catch(() => {});
|
||||||
|
|
||||||
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
||||||
|
|
||||||
// Aufnahme mit Metering starten
|
// Aufnahme mit Metering starten
|
||||||
@@ -174,6 +182,11 @@ class AudioService {
|
|||||||
this.silenceListeners.forEach(cb => cb());
|
this.silenceListeners.forEach(cb => cb());
|
||||||
}
|
}
|
||||||
}, 200);
|
}, 200);
|
||||||
|
// Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen
|
||||||
|
this.maxDurationTimer = setTimeout(() => {
|
||||||
|
console.warn(`[Audio] Max-Dauer ${MAX_RECORDING_MS}ms erreicht — Zwangs-Stop`);
|
||||||
|
this.silenceListeners.forEach(cb => cb());
|
||||||
|
}, MAX_RECORDING_MS);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
||||||
@@ -198,6 +211,10 @@ class AudioService {
|
|||||||
clearInterval(this.vadTimer);
|
clearInterval(this.vadTimer);
|
||||||
this.vadTimer = null;
|
this.vadTimer = null;
|
||||||
}
|
}
|
||||||
|
if (this.maxDurationTimer) {
|
||||||
|
clearTimeout(this.maxDurationTimer);
|
||||||
|
this.maxDurationTimer = null;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this.recorder.stopRecorder();
|
await this.recorder.stopRecorder();
|
||||||
@@ -379,6 +396,24 @@ class AudioService {
|
|||||||
this.stateListeners.forEach(cb => cb(state));
|
this.stateListeners.forEach(cb => cb(state));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Alte Aufnahme- und TTS-Files aus dem Cache loeschen (>30s alt). */
|
||||||
|
private async _cleanupStaleCacheFiles(): Promise<void> {
|
||||||
|
try {
|
||||||
|
const files = await RNFS.readDir(RNFS.CachesDirectoryPath);
|
||||||
|
const now = Date.now();
|
||||||
|
for (const f of files) {
|
||||||
|
if (!f.isFile()) continue;
|
||||||
|
if (!f.name.startsWith('aria_recording_') && !f.name.startsWith('aria_tts_')) continue;
|
||||||
|
const age = now - (f.mtime ? f.mtime.getTime() : 0);
|
||||||
|
if (age > 30000) {
|
||||||
|
await RNFS.unlink(f.path).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// silent — cleanup ist best-effort
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Singleton
|
// Singleton
|
||||||
|
|||||||
@@ -37,6 +37,8 @@
|
|||||||
- [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
|
- [x] App: "ARIA denkt..." Indicator + Abbrechen-Button (Bridge spiegelt agent_activity via RVS)
|
||||||
- [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
|
- [x] Whisper STT: Model-Auswahl in Diagnostic (tiny/base/small/medium/large-v3), Hot-Reload in Bridge, Default auf medium
|
||||||
- [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
|
- [x] App: Audio-Aufnahme explizit 16kHz mono (spart Resample, optimal fuer Whisper)
|
||||||
|
- [x] Gespraechsmodus: Speech-Gate strenger (-28dB / 500ms) — keine Umgebungsgeraeusche mehr
|
||||||
|
- [x] Gespraechsmodus: Max-Dauer 30s pro Aufnahme, Cache-Cleanup alter Files, Messages-Array gekappt (500)
|
||||||
|
|
||||||
## Offen
|
## Offen
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user