/** * Audio-Service fuer Sprach-Ein-/Ausgabe * * Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille), * TTS-Audiowiedergabe und Metering fuer visuelle Feedback. * Nutzt react-native-audio-recorder-player fuer Aufnahme. */ import { Platform, PermissionsAndroid, NativeModules } from 'react-native'; import Sound from 'react-native-sound'; import RNFS from 'react-native-fs'; import AsyncStorage from '@react-native-async-storage/async-storage'; import AudioRecorderPlayer, { AudioEncoderAndroidType, AudioSourceAndroidType, AVEncodingOption, OutputFormatAndroidType, } from 'react-native-audio-recorder-player'; // Base64-Encoder fuer Binary-Strings (Header-Bytes → Base64) const B64_CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; function btoaSafe(bin: string): string { let out = ''; const len = bin.length; for (let i = 0; i < len; i += 3) { const b1 = bin.charCodeAt(i) & 0xff; const b2 = i + 1 < len ? bin.charCodeAt(i + 1) & 0xff : 0; const b3 = i + 2 < len ? bin.charCodeAt(i + 2) & 0xff : 0; out += B64_CHARS[b1 >> 2]; out += B64_CHARS[((b1 & 0x03) << 4) | (b2 >> 4)]; out += i + 1 < len ? B64_CHARS[((b2 & 0x0f) << 2) | (b3 >> 6)] : '='; out += i + 2 < len ? B64_CHARS[b3 & 0x3f] : '='; } return out; } // Native Module fuer Audio-Focus (Ducking/Muten anderer Apps) const { AudioFocus, PcmStreamPlayer } = NativeModules as { AudioFocus?: { requestDuck: () => Promise; requestExclusive: () => Promise; release: () => Promise; }; PcmStreamPlayer?: { start: (sampleRate: number, channels: number, prerollSeconds: number) => Promise; writeChunk: (base64Pcm: string) => Promise; end: () => Promise; stop: () => Promise; }; }; // --- Typen --- export interface RecordingResult { /** Base64-kodierte Audiodaten */ base64: string; /** Dauer in Millisekunden */ durationMs: number; /** MIME-Type (z.B. audio/wav) */ mimeType: string; } export type RecordingState = 'idle' | 'recording' | 'processing'; type RecordingStateCallback = (state: RecordingState) => void; type MeterCallback = (db: number) => void; type SilenceCallback = () => void; // --- Konstanten --- const AUDIO_SAMPLE_RATE = 16000; const AUDIO_CHANNELS = 1; const AUDIO_ENCODING = 'audio/wav'; // VAD (Voice Activity Detection) — Stille-Erkennung const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt const VAD_SPEECH_THRESHOLD_DB = -28; // dB ueber dem als "Sprache" gilt (Sprach-Gate) — hoeher = weniger Umgebungsgeraeusche const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — laenger = keine Huestler/Klopfer mehr // VAD-Stille (in Sekunden) — wie lange Sprechpause toleriert wird, bevor // die Aufnahme automatisch beendet wird. Einstellbar in den App-Settings. export const VAD_SILENCE_DEFAULT_SEC = 2.8; export const VAD_SILENCE_MIN_SEC = 1.0; export const VAD_SILENCE_MAX_SEC = 8.0; export const VAD_SILENCE_STORAGE_KEY = 'aria_vad_silence_sec'; // Konversations-Fenster (in Sekunden) — nach ARIA's Antwort hat der User so // lange Zeit, im Gespraechsmodus weiter zu sprechen, ohne dass die Konversation // beendet wird. Sprichst du im Fenster nichts → Konversation aus. export const CONV_WINDOW_DEFAULT_SEC = 8.0; export const CONV_WINDOW_MIN_SEC = 3.0; export const CONV_WINDOW_MAX_SEC = 20.0; export const CONV_WINDOW_STORAGE_KEY = 'aria_conv_window_sec'; // TTS-Wiedergabegeschwindigkeit — wird pro Geraet gespeichert und an die // Bridge mitgegeben (speed-Param im F5-TTS infer()). 1.0 = normal. export const TTS_SPEED_DEFAULT = 1.0; export const TTS_SPEED_MIN = 0.1; export const TTS_SPEED_MAX = 5.0; export const TTS_SPEED_STORAGE_KEY = 'aria_tts_speed'; export async function loadTtsSpeed(): Promise { try { const raw = await AsyncStorage.getItem(TTS_SPEED_STORAGE_KEY); if (raw != null) { const n = parseFloat(raw); if (isFinite(n) && n >= TTS_SPEED_MIN && n <= TTS_SPEED_MAX) return n; } } catch {} return TTS_SPEED_DEFAULT; } export async function loadConvWindowMs(): Promise { try { const raw = await AsyncStorage.getItem(CONV_WINDOW_STORAGE_KEY); if (raw != null) { const n = parseFloat(raw); if (isFinite(n) && n >= CONV_WINDOW_MIN_SEC && n <= CONV_WINDOW_MAX_SEC) { return Math.round(n * 1000); } } } catch {} return Math.round(CONV_WINDOW_DEFAULT_SEC * 1000); } async function loadVadSilenceMs(): Promise { try { const raw = await AsyncStorage.getItem(VAD_SILENCE_STORAGE_KEY); if (raw != null) { const n = parseFloat(raw); if (isFinite(n) && n >= VAD_SILENCE_MIN_SEC && n <= VAD_SILENCE_MAX_SEC) { return Math.round(n * 1000); } } } catch {} return Math.round(VAD_SILENCE_DEFAULT_SEC * 1000); } // Max-Dauer einer Aufnahme (Notbremse gegen Runaway-Loops). Auf 2 Minuten // hochgezogen damit auch laengere Erklaerungen durchgehen. const MAX_RECORDING_MS = 120000; // Pre-Roll: Wie lange Audio im AudioTrack-Buffer liegt bevor play() startet. // Einstellbar via Diagnostic/Settings (Key: aria_tts_preroll_sec). export const TTS_PREROLL_DEFAULT_SEC = 3.5; export const TTS_PREROLL_MIN_SEC = 0; // 0 = sofort abspielen (F5-TTS ist schnell genug) export const TTS_PREROLL_MAX_SEC = 6.0; export const TTS_PREROLL_STORAGE_KEY = 'aria_tts_preroll_sec'; async function loadPrerollSec(): Promise { try { const raw = await AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY); if (raw != null) { const n = parseFloat(raw); if (isFinite(n) && n >= TTS_PREROLL_MIN_SEC && n <= TTS_PREROLL_MAX_SEC) { return n; } } } catch {} return TTS_PREROLL_DEFAULT_SEC; } // --- Audio-Service --- class AudioService { private recordingState: RecordingState = 'idle'; private recordingStartTime: number = 0; private stateListeners: RecordingStateCallback[] = []; private meterListeners: MeterCallback[] = []; private silenceListeners: SilenceCallback[] = []; private currentSound: Sound | null = null; private recorder: AudioRecorderPlayer; private recordingPath: string = ''; // Audio-Queue fuer sequentielle TTS-Wiedergabe private audioQueue: string[] = []; private isPlaying: boolean = false; private preloadedSound: Sound | null = null; private preloadedPath: string = ''; // Sprach-Gate: Aufnahme erst senden wenn tatsaechlich gesprochen wurde private speechDetected: boolean = false; private speechStartTime: number = 0; // PCM-Stream (XTTS): aktive Session + Cache-Puffer pro messageId private pcmStreamActive: boolean = false; private pcmMessageId: string = ''; private pcmSampleRate: number = 24000; private pcmChannels: number = 1; private pcmBuffer: string[] = []; // base64-chunks zum spaeteren WAV-Build private pcmBytesCollected: number = 0; private readonly PCM_MAX_CACHE_BYTES = 30 * 1024 * 1024; // 30MB // AudioFocus wird verzoegert freigegeben — wenn ARIA eine zweite Antwort // direkt hinterherschickt (oder ein neuer Stream startet), bleibt Spotify // pausiert. Ohne diese Verzoegerung springt Spotify im Mikro-Sekunden-Gap // zwischen zwei Streams kurz wieder an. private focusReleaseTimer: ReturnType | null = null; private readonly FOCUS_RELEASE_DELAY_MS = 800; // Conversation-Mode: solange aktiv (Wake-Word Status 'conversing' ODER // wir wissen "ARIA spricht gerade in einem Multi-Turn-Dialog"), halten wir // den AudioFocus DAUERHAFT. Der per-Stream-Release wird unterdrueckt, // damit Spotify nicht in Render-Pausen oder zwischen Antworten zurueckkehrt. private _conversationFocusActive: boolean = false; // VAD State private vadEnabled: boolean = false; private lastSpeechTime: number = 0; private vadTimer: ReturnType | null = null; private maxDurationTimer: ReturnType | null = null; // Latch damit der Silence-Callback pro Aufnahme genau einmal feuert private silenceFired: boolean = false; private noSpeechTimer: ReturnType | null = null; constructor() { this.recorder = new AudioRecorderPlayer(); this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates } /** AudioFocus mit kleiner Verzoegerung freigeben — Spotify/YouTube * springen sonst im Gap zwischen zwei TTS-Streams (oder wenn ARIA * eine zweite Antwort direkt hinterherschickt) kurz wieder an. * Im Conversation-Mode (Wake-Word conversing) wird das Release komplett * unterdrueckt — der Focus bleibt fuer die ganze Konversation gehalten. */ private _releaseFocusDeferred(): void { if (this._conversationFocusActive) { this._cancelDeferredFocusRelease(); return; } this._cancelDeferredFocusRelease(); this.focusReleaseTimer = setTimeout(() => { this.focusReleaseTimer = null; if (this._conversationFocusActive) return; AudioFocus?.release().catch(() => {}); }, this.FOCUS_RELEASE_DELAY_MS); } private _cancelDeferredFocusRelease(): void { if (this.focusReleaseTimer) { clearTimeout(this.focusReleaseTimer); this.focusReleaseTimer = null; } } /** Conversation-Mode beginnt → AudioFocus dauerhaft halten (Spotify bleibt * pausiert). Idempotent: mehrfaches Aufrufen ist sicher. */ acquireConversationFocus(): void { if (this._conversationFocusActive) return; this._conversationFocusActive = true; this._cancelDeferredFocusRelease(); console.log('[Audio] Conversation-Focus aktiv (Spotify bleibt gepaust)'); AudioFocus?.requestDuck().catch(() => {}); } /** Conversation-Mode endet → Focus darf wieder freigegeben werden * (verzoegert, damit eine direkt folgende Antwort nichts kaputtmacht). */ releaseConversationFocus(): void { if (!this._conversationFocusActive) return; this._conversationFocusActive = false; console.log('[Audio] Conversation-Focus inaktiv'); this._releaseFocusDeferred(); } /** TTS-Wiedergabe haart stoppen — z.B. wenn ein Anruf reinkommt. * Released auch sofort den AudioFocus damit der Anruf-Klingelton hoerbar ist. */ haltAllPlayback(reason: string = ''): void { console.log('[Audio] haltAllPlayback: %s', reason || '(no reason)'); this._conversationFocusActive = false; this.stopPlayback(); } // --- Berechtigungen --- async requestMicrophonePermission(): Promise { if (Platform.OS !== 'android') { return true; } try { const granted = await PermissionsAndroid.request( PermissionsAndroid.PERMISSIONS.RECORD_AUDIO, { title: 'ARIA Cockpit - Mikrofon', message: 'ARIA benoetigt Zugriff auf das Mikrofon fuer Spracheingabe.', buttonPositive: 'Erlauben', buttonNegative: 'Ablehnen', }, ); return granted === PermissionsAndroid.RESULTS.GRANTED; } catch (err) { console.error('[Audio] Fehler bei Berechtigungsanfrage:', err); return false; } } // --- Aufnahme --- /** Mikrofon-Aufnahme starten. * * @param autoStop VAD aktivieren — Auto-Stop bei Stille * @param noSpeechTimeoutMs Wenn der User innerhalb dieser Zeit nichts sagt, * wird Stille gemeldet (Recording wird verworfen). * Fuer Conversation-Window: nach ARIA's Antwort * hast du nur N Sekunden um anzufangen, sonst * Gespraech zu Ende. */ async startRecording(autoStop: boolean = false, noSpeechTimeoutMs: number = 0): Promise { if (this.recordingState !== 'idle') { console.warn('[Audio] Aufnahme laeuft bereits'); return false; } const hasPermission = await this.requestMicrophonePermission(); if (!hasPermission) { console.warn('[Audio] Keine Mikrofon-Berechtigung'); return false; } try { // Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert) this.stopPlayback(); // Aufraeumen: Alte aria_recording_ und aria_tts_ Files loeschen // (Schutz gegen Cache-Ueberlauf im Gespraechsmodus bei vielen Zyklen) this._cleanupStaleCacheFiles().catch(() => {}); this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`; // Aufnahme mit Metering starten await this.recorder.startRecorder(this.recordingPath, { AudioEncoderAndroid: AudioEncoderAndroidType.AAC, AudioSourceAndroid: AudioSourceAndroidType.MIC, OutputFormatAndroid: OutputFormatAndroidType.MPEG_4, AudioSamplingRateAndroid: 16000, AudioChannelsAndroid: 1, }, true); // meteringEnabled = true // Metering-Callback this.recorder.addRecordBackListener((e) => { const db = e.currentMetering ?? -160; this.meterListeners.forEach(cb => cb(db)); // Sprach-Gate: Erkennen ob tatsaechlich gesprochen wird if (db > VAD_SPEECH_THRESHOLD_DB) { if (!this.speechDetected && this.speechStartTime === 0) { this.speechStartTime = Date.now(); } if (this.speechStartTime > 0 && Date.now() - this.speechStartTime >= VAD_SPEECH_MIN_MS) { this.speechDetected = true; } } else { if (!this.speechDetected) { this.speechStartTime = 0; // Reset wenn noch nicht als Sprache erkannt } } // VAD: Stille erkennen (nur wenn Sprache erkannt wurde) if (this.vadEnabled) { if (db > VAD_SILENCE_THRESHOLD_DB) { this.lastSpeechTime = Date.now(); } } }); this.recordingStartTime = Date.now(); this.lastSpeechTime = Date.now(); this.speechDetected = false; this.speechStartTime = 0; this.setState('recording'); // Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.) this._cancelDeferredFocusRelease(); AudioFocus?.requestExclusive().catch(() => {}); // VAD aktivieren — Stille-Dauer aus AsyncStorage (Settings-konfigurierbar). // WICHTIG: jeder Trigger (VAD-Stille / Max-Dauer / No-Speech-Window) // disable SOFORT den VAD-Flag und clear den Timer, BEVOR die Listener // gefeuert werden. Sonst feuert das setInterval weiter alle 200ms und // ruft stopRecording parallel auf → audio-recorder-player crasht. this.vadEnabled = autoStop; this.silenceFired = false; const fireSilenceOnce = (reason: string) => { if (this.silenceFired) return; this.silenceFired = true; this.vadEnabled = false; if (this.vadTimer) { clearInterval(this.vadTimer); this.vadTimer = null; } if (this.maxDurationTimer) { clearTimeout(this.maxDurationTimer); this.maxDurationTimer = null; } if (this.noSpeechTimer) { clearTimeout(this.noSpeechTimer); this.noSpeechTimer = null; } console.log('[Audio] Silence-Fire: %s', reason); this.silenceListeners.forEach(cb => { try { cb(); } catch (e) { console.warn('[Audio] silence listener err:', e); } }); }; if (autoStop) { const vadSilenceMs = await loadVadSilenceMs(); console.log('[Audio] startRecording: autoStop=true, VAD-Stille=%dms, MAX=%dms', vadSilenceMs, MAX_RECORDING_MS); this.vadTimer = setInterval(() => { const silenceDuration = Date.now() - this.lastSpeechTime; if (silenceDuration >= vadSilenceMs) { fireSilenceOnce(`VAD ${silenceDuration}ms Stille (Schwelle=${vadSilenceMs}ms)`); } }, 200); // Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen this.maxDurationTimer = setTimeout(() => { fireSilenceOnce(`Max-Dauer ${MAX_RECORDING_MS}ms`); }, MAX_RECORDING_MS); } // Conversation-Window: Wenn der User innerhalb noSpeechTimeoutMs nicht // anfaengt zu sprechen → Aufnahme abbrechen (Speech-Gate verwirft sie). if (noSpeechTimeoutMs > 0) { this.noSpeechTimer = setTimeout(() => { if (!this.speechDetected && this.recordingState === 'recording') { fireSilenceOnce(`Conversation-Window ${noSpeechTimeoutMs}ms ohne Sprache`); } }, noSpeechTimeoutMs); } console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop); return true; } catch (err) { console.error('[Audio] Fehler beim Starten der Aufnahme:', err); this.setState('idle'); return false; } } /** Aufnahme stoppen und Ergebnis zurueckgeben */ async stopRecording(): Promise { if (this.recordingState !== 'recording') { console.warn('[Audio] Keine aktive Aufnahme'); return null; } this.setState('processing'); this.vadEnabled = false; if (this.vadTimer) { clearInterval(this.vadTimer); this.vadTimer = null; } if (this.maxDurationTimer) { clearTimeout(this.maxDurationTimer); this.maxDurationTimer = null; } if (this.noSpeechTimer) { clearTimeout(this.noSpeechTimer); this.noSpeechTimer = null; } try { await this.recorder.stopRecorder(); this.recorder.removeRecordBackListener(); // Audio-Focus verzoegert freigeben — gleich kommt die TTS-Antwort, // im Gap soll Spotify nicht hochkommen. this._releaseFocusDeferred(); const durationMs = Date.now() - this.recordingStartTime; const hadSpeech = this.speechDetected; // Sprach-Gate: Wenn keine Sprache erkannt → Aufnahme verwerfen if (!hadSpeech) { RNFS.unlink(this.recordingPath).catch(() => {}); this.setState('idle'); console.log('[Audio] Aufnahme verworfen — keine Sprache erkannt (nur Umgebungsgeraeusche)'); return null; } // Audio-Datei als Base64 lesen const base64Data = await RNFS.readFile(this.recordingPath, 'base64'); // Temp-Datei aufraeumen RNFS.unlink(this.recordingPath).catch(() => {}); this.setState('idle'); console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB, Sprache erkannt)`); return { base64: base64Data, durationMs, mimeType: 'audio/mp4', // AAC in MP4 Container }; } catch (err) { console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err); this.setState('idle'); return null; } } // --- Wiedergabe --- /** Base64-kodiertes Audio in die Queue stellen und abspielen */ async playAudio(base64Data: string): Promise { if (!base64Data) return; this.audioQueue.push(base64Data); if (!this.isPlaying) { this._playNext(); } } /** Base64-Audio persistent speichern. Gibt file:// Pfad zurueck (oder leer bei Fehler). */ async cacheAudio(base64Data: string, messageId: string): Promise { if (!base64Data || !messageId) return ''; try { const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`; await RNFS.mkdir(dir).catch(() => {}); const path = `${dir}/${messageId}.wav`; // Wenn Datei schon existiert (z.B. XTTS Chunks) → anhaengen statt ueberschreiben const exists = await RNFS.exists(path); if (exists) { // Bestehende + neue Base64 laden, zusammenkleben (fuer jetzt: ueberschreiben) // XTTS sendet mehrere Chunks — bei mehrfacher Ueberschreibung bleibt nur der letzte // Fuer eine echte Konkatenation muesste WAV-Header gemerged werden await RNFS.writeFile(path, base64Data, 'base64'); } else { await RNFS.writeFile(path, base64Data, 'base64'); } return `file://${path}`; } catch (err) { console.warn('[Audio] cacheAudio fehlgeschlagen:', err); return ''; } } /** Einen PCM-Chunk aus einer audio_pcm Nachricht empfangen. * silent=true → nur cachen, nicht abspielen (z.B. wenn TTS geraetelokal gemutet). * Gibt bei final=true den Cache-Pfad zurueck (file://) oder '' wenn nicht gecached. * * Wrapper serialisiert aufeinanderfolgende Chunk-Calls via Promise-Queue — * sonst gabs bei kurzen Streams einen Race: final-Chunk konnte `end()` rufen * BEVOR der vorherige `start()` im Native-Modul fertig war. Der Writer- * Thread sah dann endRequested=true ohne jemals Chunks zu verarbeiten. */ private _pcmChunkQueue: Promise = Promise.resolve(); async handlePcmChunk(payload: { base64: string; sampleRate?: number; channels?: number; messageId?: string; chunk?: number; final?: boolean; silent?: boolean; }): Promise { const p = this._pcmChunkQueue.then(() => this._handlePcmChunkImpl(payload)).catch(err => { console.warn('[Audio] handlePcmChunk queued err:', err); return ''; }); // Chain only on the side effect — callers still get the per-call result this._pcmChunkQueue = p; return p; } private async _handlePcmChunkImpl(payload: { base64: string; sampleRate?: number; channels?: number; messageId?: string; chunk?: number; final?: boolean; silent?: boolean; }): Promise { const silent = !!payload.silent; if (!silent && !PcmStreamPlayer) { console.warn('[Audio] PcmStreamPlayer Native Module nicht verfuegbar'); return ''; } // Debug-Log bei Chunk 0 eines neuen Streams — damit man im adb logcat // sieht warum der Auto-Playback greift oder nicht. if ((payload.chunk ?? 0) === 0 && !this.pcmStreamActive) { console.log('[Audio] PCM-Stream start: silent=%s messageId=%s sr=%s ch=%s', silent, payload.messageId || '(none)', payload.sampleRate, payload.channels); } const messageId = payload.messageId || ''; const sampleRate = payload.sampleRate || 24000; const channels = payload.channels || 1; const base64 = payload.base64 || ''; const isFinal = !!payload.final; // Neuer Stream? (messageId Wechsel oder nicht aktiv) if (!this.pcmStreamActive || this.pcmMessageId !== messageId) { if (this.pcmStreamActive && !silent) { try { await PcmStreamPlayer!.stop(); } catch {} this.pcmBuffer = []; this.pcmBytesCollected = 0; } this.pcmStreamActive = true; this.pcmMessageId = messageId; this.pcmSampleRate = sampleRate; this.pcmChannels = channels; this.pcmBuffer = []; this.pcmBytesCollected = 0; if (!silent) { const prerollSec = await loadPrerollSec(); try { await PcmStreamPlayer!.start(sampleRate, channels, prerollSec); } catch (err) { console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err); this.pcmStreamActive = false; return ''; } this._cancelDeferredFocusRelease(); AudioFocus?.requestDuck().catch(() => {}); } } // Chunk — immer cachen, nur bei !silent auch abspielen if (base64) { if (!silent) { try { await PcmStreamPlayer!.writeChunk(base64); } catch (err) { console.warn('[Audio] writeChunk', err); } } if (messageId && this.pcmBytesCollected < this.PCM_MAX_CACHE_BYTES) { this.pcmBuffer.push(base64); this.pcmBytesCollected += Math.floor(base64.length * 0.75); } } if (isFinal) { if (!silent) { // end() resolved jetzt erst wenn der native Writer-Thread fertig // ist (alle Samples ausgespielt) — danach AudioFocus verzoegert // freigeben, damit Spotify/YouTube nicht im Mikro-Gap zwischen zwei // ARIA-Antworten wieder hochdrehen. Wenn ein neuer Stream innerhalb // FOCUS_RELEASE_DELAY_MS startet, wird das Release abgebrochen. try { await PcmStreamPlayer!.end(); } catch {} this._releaseFocusDeferred(); } this.pcmStreamActive = false; if (messageId && this.pcmBuffer.length > 0) { const audioPath = await this._savePcmBufferAsWav(messageId); this.pcmBuffer = []; this.pcmBytesCollected = 0; this.pcmMessageId = ''; return audioPath; } this.pcmMessageId = ''; } return ''; } /** Gesammelte PCM-Chunks als WAV speichern. Gibt file:// Pfad zurueck. */ private async _savePcmBufferAsWav(messageId: string): Promise { try { const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`; await RNFS.mkdir(dir).catch(() => {}); const path = `${dir}/${messageId}.wav`; // WAV-Header fuer PCM s16le const sampleRate = this.pcmSampleRate; const channels = this.pcmChannels; const bitsPerSample = 16; const byteRate = sampleRate * channels * bitsPerSample / 8; const blockAlign = channels * bitsPerSample / 8; const dataSize = this.pcmBytesCollected; const fileSize = 36 + dataSize; // Header als Base64 (44 bytes) const header = new Uint8Array(44); const dv = new DataView(header.buffer); // "RIFF" header[0] = 0x52; header[1] = 0x49; header[2] = 0x46; header[3] = 0x46; dv.setUint32(4, fileSize, true); // "WAVE" header[8] = 0x57; header[9] = 0x41; header[10] = 0x56; header[11] = 0x45; // "fmt " header[12] = 0x66; header[13] = 0x6d; header[14] = 0x74; header[15] = 0x20; dv.setUint32(16, 16, true); // fmt chunk size dv.setUint16(20, 1, true); // PCM format dv.setUint16(22, channels, true); dv.setUint32(24, sampleRate, true); dv.setUint32(28, byteRate, true); dv.setUint16(32, blockAlign, true); dv.setUint16(34, bitsPerSample, true); // "data" header[36] = 0x64; header[37] = 0x61; header[38] = 0x74; header[39] = 0x61; dv.setUint32(40, dataSize, true); // Header als base64 let headerB64 = ''; const chunk = 1024; for (let i = 0; i < header.length; i += chunk) { headerB64 += String.fromCharCode(...Array.from(header.slice(i, i + chunk))); } headerB64 = btoaSafe(headerB64); // Datei schreiben: Header + alle PCM-Chunks await RNFS.writeFile(path, headerB64, 'base64'); for (const b64 of this.pcmBuffer) { await RNFS.appendFile(path, b64, 'base64'); } console.log(`[Audio] PCM-Cache geschrieben: ${path} (${(dataSize / 1024).toFixed(0)}KB, ${this.pcmBuffer.length} chunks)`); return `file://${path}`; } catch (err) { console.warn('[Audio] _savePcmBufferAsWav fehlgeschlagen:', err); return ''; } } /** Audio aus lokaler Datei (file:// Pfad) in die Queue und abspielen. */ async playFromPath(filePath: string): Promise { if (!filePath) return; try { const cleanPath = filePath.replace(/^file:\/\//, ''); if (!(await RNFS.exists(cleanPath))) { console.warn('[Audio] Cache-Datei existiert nicht mehr:', cleanPath); return; } const b64 = await RNFS.readFile(cleanPath, 'base64'); this.playAudio(b64); } catch (err) { console.warn('[Audio] playFromPath fehlgeschlagen:', err); } } // Callback wenn alle Audio-Teile abgespielt sind private playbackFinishedListeners: (() => void)[] = []; onPlaybackFinished(callback: () => void): () => void { this.playbackFinishedListeners.push(callback); return () => { this.playbackFinishedListeners = this.playbackFinishedListeners.filter(cb => cb !== callback); }; } /** Naechstes Audio aus der Queue abspielen */ private async _playNext(): Promise { if (this.audioQueue.length === 0) { this.isPlaying = false; // Audio-Focus verzoegert abgeben → wenn gleich noch eine Antwort kommt, // bleibt Spotify pausiert. this._releaseFocusDeferred(); // Alle Audio-Teile abgespielt → Listener benachrichtigen this.playbackFinishedListeners.forEach(cb => cb()); return; } // Beim ersten Playback-Start: andere Apps ducken if (!this.isPlaying) { this._cancelDeferredFocusRelease(); AudioFocus?.requestDuck().catch(() => {}); } this.isPlaying = true; // Preloaded Sound verwenden wenn verfuegbar, sonst neu laden let sound: Sound; let soundPath: string; if (this.preloadedSound) { sound = this.preloadedSound; soundPath = this.preloadedPath; this.preloadedSound = null; this.preloadedPath = ''; // Daten aus Queue entfernen (wurde schon preloaded) this.audioQueue.shift(); } else { const base64Data = this.audioQueue.shift()!; try { soundPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`; await RNFS.writeFile(soundPath, base64Data, 'base64'); sound = await new Promise((resolve, reject) => { const s = new Sound(soundPath, '', (err) => err ? reject(err) : resolve(s)); }); } catch (err) { console.error('[Audio] Laden fehlgeschlagen:', err); this._playNext(); return; } } this.currentSound = sound; // Naechstes Audio schon vorbereiten waehrend dieses abspielt this._preloadNext(); sound.play((success) => { if (!success) console.warn('[Audio] Wiedergabe fehlgeschlagen'); sound.release(); this.currentSound = null; RNFS.unlink(soundPath).catch(() => {}); this._playNext(); }); } /** Naechstes Audio im Hintergrund vorladen (verhindert Stottern) */ private async _preloadNext(): Promise { if (this.audioQueue.length === 0 || this.preloadedSound) return; const base64Data = this.audioQueue[0]; // Nicht shift — bleibt in Queue try { const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_pre_${Date.now()}.wav`; await RNFS.writeFile(tmpPath, base64Data, 'base64'); this.preloadedSound = await new Promise((resolve, reject) => { const s = new Sound(tmpPath, '', (err) => err ? reject(err) : resolve(s)); }); this.preloadedPath = tmpPath; } catch { this.preloadedSound = null; this.preloadedPath = ''; } } /** Laufende Wiedergabe stoppen + Queue leeren */ stopPlayback(): void { this.audioQueue = []; this.isPlaying = false; if (this.currentSound) { this.currentSound.stop(); this.currentSound.release(); this.currentSound = null; } if (this.preloadedSound) { this.preloadedSound.release(); this.preloadedSound = null; if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {}); this.preloadedPath = ''; } // PCM-Stream ebenfalls hart stoppen (Cancel/Abbruch) if (this.pcmStreamActive) { PcmStreamPlayer?.stop().catch(() => {}); this.pcmStreamActive = false; this.pcmBuffer = []; this.pcmBytesCollected = 0; this.pcmMessageId = ''; } // Audio-Focus sofort freigeben — User hat explizit abgebrochen this._cancelDeferredFocusRelease(); AudioFocus?.release().catch(() => {}); } // --- Status & Callbacks --- getRecordingState(): RecordingState { return this.recordingState; } /** Callback fuer Aufnahmestatus-Aenderungen */ onStateChange(callback: RecordingStateCallback): () => void { this.stateListeners.push(callback); return () => { this.stateListeners = this.stateListeners.filter(cb => cb !== callback); }; } /** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */ onMeterUpdate(callback: MeterCallback): () => void { this.meterListeners.push(callback); return () => { this.meterListeners = this.meterListeners.filter(cb => cb !== callback); }; } /** Callback wenn VAD Stille erkennt (Auto-Stop) */ onSilenceDetected(callback: SilenceCallback): () => void { this.silenceListeners.push(callback); return () => { this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback); }; } private setState(state: RecordingState): void { if (this.recordingState !== state) { this.recordingState = state; this.stateListeners.forEach(cb => cb(state)); } } /** Alte Aufnahme- und TTS-Files aus dem Cache loeschen (>30s alt). */ private async _cleanupStaleCacheFiles(): Promise { try { const files = await RNFS.readDir(RNFS.CachesDirectoryPath); const now = Date.now(); for (const f of files) { if (!f.isFile()) continue; if (!f.name.startsWith('aria_recording_') && !f.name.startsWith('aria_tts_')) continue; const age = now - (f.mtime ? f.mtime.getTime() : 0); if (age > 30000) { await RNFS.unlink(f.path).catch(() => {}); } } } catch { // silent — cleanup ist best-effort } } /** Alte TTS-Cache-Dateien loeschen die nicht mehr referenziert sind (>30 Tage). */ async cleanupOldTTSCache(keepMessageIds: Set, maxAgeDays = 30): Promise { try { const dir = `${RNFS.DocumentDirectoryPath}/tts_cache`; if (!(await RNFS.exists(dir))) return; const files = await RNFS.readDir(dir); const maxAgeMs = maxAgeDays * 24 * 60 * 60 * 1000; const now = Date.now(); for (const f of files) { if (!f.isFile() || !f.name.endsWith('.wav')) continue; const messageId = f.name.replace(/\.wav$/, ''); const age = now - (f.mtime ? f.mtime.getTime() : 0); // Loeschen wenn: nicht mehr referenziert UND aelter als X Tage if (!keepMessageIds.has(messageId) && age > maxAgeMs) { await RNFS.unlink(f.path).catch(() => {}); } } } catch { // silent } } } // Singleton const audioService = new AudioService(); export default audioService;