385 lines
12 KiB
TypeScript
385 lines
12 KiB
TypeScript
/**
|
|
* Audio-Service fuer Sprach-Ein-/Ausgabe
|
|
*
|
|
* Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
|
|
* TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
|
|
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
|
|
*/
|
|
|
|
import { Platform, PermissionsAndroid } from 'react-native';
|
|
import Sound from 'react-native-sound';
|
|
import RNFS from 'react-native-fs';
|
|
import AudioRecorderPlayer, {
|
|
AudioEncoderAndroidType,
|
|
AudioSourceAndroidType,
|
|
AVEncodingOption,
|
|
OutputFormatAndroidType,
|
|
} from 'react-native-audio-recorder-player';
|
|
|
|
// --- Typen ---
|
|
|
|
export interface RecordingResult {
|
|
/** Base64-kodierte Audiodaten */
|
|
base64: string;
|
|
/** Dauer in Millisekunden */
|
|
durationMs: number;
|
|
/** MIME-Type (z.B. audio/wav) */
|
|
mimeType: string;
|
|
}
|
|
|
|
export type RecordingState = 'idle' | 'recording' | 'processing';
|
|
|
|
type RecordingStateCallback = (state: RecordingState) => void;
|
|
type MeterCallback = (db: number) => void;
|
|
type SilenceCallback = () => void;
|
|
|
|
// --- Konstanten ---
|
|
|
|
const AUDIO_SAMPLE_RATE = 16000;
|
|
const AUDIO_CHANNELS = 1;
|
|
const AUDIO_ENCODING = 'audio/wav';
|
|
|
|
// VAD (Voice Activity Detection) — Stille-Erkennung
|
|
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
|
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
|
const VAD_SPEECH_THRESHOLD_DB = -35; // dB ueber dem als "Sprache" gilt (Sprach-Gate)
|
|
const VAD_SPEECH_MIN_MS = 300; // ms Sprache bevor Aufnahme zaehlt
|
|
|
|
// --- Audio-Service ---
|
|
|
|
class AudioService {
|
|
private recordingState: RecordingState = 'idle';
|
|
private recordingStartTime: number = 0;
|
|
private stateListeners: RecordingStateCallback[] = [];
|
|
private meterListeners: MeterCallback[] = [];
|
|
private silenceListeners: SilenceCallback[] = [];
|
|
private currentSound: Sound | null = null;
|
|
private recorder: AudioRecorderPlayer;
|
|
private recordingPath: string = '';
|
|
|
|
// Audio-Queue fuer sequentielle TTS-Wiedergabe
|
|
private audioQueue: string[] = [];
|
|
private isPlaying: boolean = false;
|
|
private preloadedSound: Sound | null = null;
|
|
private preloadedPath: string = '';
|
|
|
|
// Sprach-Gate: Aufnahme erst senden wenn tatsaechlich gesprochen wurde
|
|
private speechDetected: boolean = false;
|
|
private speechStartTime: number = 0;
|
|
|
|
// VAD State
|
|
private vadEnabled: boolean = false;
|
|
private lastSpeechTime: number = 0;
|
|
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
|
|
|
constructor() {
|
|
this.recorder = new AudioRecorderPlayer();
|
|
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
|
|
}
|
|
|
|
// --- Berechtigungen ---
|
|
|
|
async requestMicrophonePermission(): Promise<boolean> {
|
|
if (Platform.OS !== 'android') {
|
|
return true;
|
|
}
|
|
|
|
try {
|
|
const granted = await PermissionsAndroid.request(
|
|
PermissionsAndroid.PERMISSIONS.RECORD_AUDIO,
|
|
{
|
|
title: 'ARIA Cockpit - Mikrofon',
|
|
message: 'ARIA benoetigt Zugriff auf das Mikrofon fuer Spracheingabe.',
|
|
buttonPositive: 'Erlauben',
|
|
buttonNegative: 'Ablehnen',
|
|
},
|
|
);
|
|
return granted === PermissionsAndroid.RESULTS.GRANTED;
|
|
} catch (err) {
|
|
console.error('[Audio] Fehler bei Berechtigungsanfrage:', err);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// --- Aufnahme ---
|
|
|
|
/** Mikrofon-Aufnahme starten */
|
|
async startRecording(autoStop: boolean = false): Promise<boolean> {
|
|
if (this.recordingState !== 'idle') {
|
|
console.warn('[Audio] Aufnahme laeuft bereits');
|
|
return false;
|
|
}
|
|
|
|
const hasPermission = await this.requestMicrophonePermission();
|
|
if (!hasPermission) {
|
|
console.warn('[Audio] Keine Mikrofon-Berechtigung');
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
|
this.stopPlayback();
|
|
|
|
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
|
|
|
// Aufnahme mit Metering starten
|
|
await this.recorder.startRecorder(this.recordingPath, {
|
|
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
|
|
AudioSourceAndroid: AudioSourceAndroidType.MIC,
|
|
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
|
|
}, true); // meteringEnabled = true
|
|
|
|
// Metering-Callback
|
|
this.recorder.addRecordBackListener((e) => {
|
|
const db = e.currentMetering ?? -160;
|
|
this.meterListeners.forEach(cb => cb(db));
|
|
|
|
// Sprach-Gate: Erkennen ob tatsaechlich gesprochen wird
|
|
if (db > VAD_SPEECH_THRESHOLD_DB) {
|
|
if (!this.speechDetected && this.speechStartTime === 0) {
|
|
this.speechStartTime = Date.now();
|
|
}
|
|
if (this.speechStartTime > 0 && Date.now() - this.speechStartTime >= VAD_SPEECH_MIN_MS) {
|
|
this.speechDetected = true;
|
|
}
|
|
} else {
|
|
if (!this.speechDetected) {
|
|
this.speechStartTime = 0; // Reset wenn noch nicht als Sprache erkannt
|
|
}
|
|
}
|
|
|
|
// VAD: Stille erkennen (nur wenn Sprache erkannt wurde)
|
|
if (this.vadEnabled) {
|
|
if (db > VAD_SILENCE_THRESHOLD_DB) {
|
|
this.lastSpeechTime = Date.now();
|
|
}
|
|
}
|
|
});
|
|
|
|
this.recordingStartTime = Date.now();
|
|
this.lastSpeechTime = Date.now();
|
|
this.speechDetected = false;
|
|
this.speechStartTime = 0;
|
|
this.setState('recording');
|
|
|
|
// VAD aktivieren
|
|
this.vadEnabled = autoStop;
|
|
if (autoStop) {
|
|
this.vadTimer = setInterval(() => {
|
|
const silenceDuration = Date.now() - this.lastSpeechTime;
|
|
if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
|
|
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
|
|
this.silenceListeners.forEach(cb => cb());
|
|
}
|
|
}, 200);
|
|
}
|
|
|
|
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
|
return true;
|
|
} catch (err) {
|
|
console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
|
|
this.setState('idle');
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Aufnahme stoppen und Ergebnis zurueckgeben */
|
|
async stopRecording(): Promise<RecordingResult | null> {
|
|
if (this.recordingState !== 'recording') {
|
|
console.warn('[Audio] Keine aktive Aufnahme');
|
|
return null;
|
|
}
|
|
|
|
this.setState('processing');
|
|
this.vadEnabled = false;
|
|
if (this.vadTimer) {
|
|
clearInterval(this.vadTimer);
|
|
this.vadTimer = null;
|
|
}
|
|
|
|
try {
|
|
await this.recorder.stopRecorder();
|
|
this.recorder.removeRecordBackListener();
|
|
|
|
const durationMs = Date.now() - this.recordingStartTime;
|
|
const hadSpeech = this.speechDetected;
|
|
|
|
// Sprach-Gate: Wenn keine Sprache erkannt → Aufnahme verwerfen
|
|
if (!hadSpeech) {
|
|
RNFS.unlink(this.recordingPath).catch(() => {});
|
|
this.setState('idle');
|
|
console.log('[Audio] Aufnahme verworfen — keine Sprache erkannt (nur Umgebungsgeraeusche)');
|
|
return null;
|
|
}
|
|
|
|
// Audio-Datei als Base64 lesen
|
|
const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
|
|
|
|
// Temp-Datei aufraeumen
|
|
RNFS.unlink(this.recordingPath).catch(() => {});
|
|
|
|
this.setState('idle');
|
|
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB, Sprache erkannt)`);
|
|
|
|
return {
|
|
base64: base64Data,
|
|
durationMs,
|
|
mimeType: 'audio/mp4', // AAC in MP4 Container
|
|
};
|
|
} catch (err) {
|
|
console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
|
|
this.setState('idle');
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// --- Wiedergabe ---
|
|
|
|
/** Base64-kodiertes Audio in die Queue stellen und abspielen */
|
|
async playAudio(base64Data: string): Promise<void> {
|
|
if (!base64Data) return;
|
|
|
|
this.audioQueue.push(base64Data);
|
|
if (!this.isPlaying) {
|
|
this._playNext();
|
|
}
|
|
}
|
|
|
|
// Callback wenn alle Audio-Teile abgespielt sind
|
|
private playbackFinishedListeners: (() => void)[] = [];
|
|
|
|
onPlaybackFinished(callback: () => void): () => void {
|
|
this.playbackFinishedListeners.push(callback);
|
|
return () => {
|
|
this.playbackFinishedListeners = this.playbackFinishedListeners.filter(cb => cb !== callback);
|
|
};
|
|
}
|
|
|
|
/** Naechstes Audio aus der Queue abspielen */
|
|
private async _playNext(): Promise<void> {
|
|
if (this.audioQueue.length === 0) {
|
|
this.isPlaying = false;
|
|
// Alle Audio-Teile abgespielt → Listener benachrichtigen
|
|
this.playbackFinishedListeners.forEach(cb => cb());
|
|
return;
|
|
}
|
|
|
|
this.isPlaying = true;
|
|
|
|
// Preloaded Sound verwenden wenn verfuegbar, sonst neu laden
|
|
let sound: Sound;
|
|
let soundPath: string;
|
|
|
|
if (this.preloadedSound) {
|
|
sound = this.preloadedSound;
|
|
soundPath = this.preloadedPath;
|
|
this.preloadedSound = null;
|
|
this.preloadedPath = '';
|
|
// Daten aus Queue entfernen (wurde schon preloaded)
|
|
this.audioQueue.shift();
|
|
} else {
|
|
const base64Data = this.audioQueue.shift()!;
|
|
try {
|
|
soundPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
|
await RNFS.writeFile(soundPath, base64Data, 'base64');
|
|
sound = await new Promise<Sound>((resolve, reject) => {
|
|
const s = new Sound(soundPath, '', (err) => err ? reject(err) : resolve(s));
|
|
});
|
|
} catch (err) {
|
|
console.error('[Audio] Laden fehlgeschlagen:', err);
|
|
this._playNext();
|
|
return;
|
|
}
|
|
}
|
|
|
|
this.currentSound = sound;
|
|
|
|
// Naechstes Audio schon vorbereiten waehrend dieses abspielt
|
|
this._preloadNext();
|
|
|
|
sound.play((success) => {
|
|
if (!success) console.warn('[Audio] Wiedergabe fehlgeschlagen');
|
|
sound.release();
|
|
this.currentSound = null;
|
|
RNFS.unlink(soundPath).catch(() => {});
|
|
this._playNext();
|
|
});
|
|
}
|
|
|
|
/** Naechstes Audio im Hintergrund vorladen (verhindert Stottern) */
|
|
private async _preloadNext(): Promise<void> {
|
|
if (this.audioQueue.length === 0 || this.preloadedSound) return;
|
|
|
|
const base64Data = this.audioQueue[0]; // Nicht shift — bleibt in Queue
|
|
try {
|
|
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_pre_${Date.now()}.wav`;
|
|
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
|
this.preloadedSound = await new Promise<Sound>((resolve, reject) => {
|
|
const s = new Sound(tmpPath, '', (err) => err ? reject(err) : resolve(s));
|
|
});
|
|
this.preloadedPath = tmpPath;
|
|
} catch {
|
|
this.preloadedSound = null;
|
|
this.preloadedPath = '';
|
|
}
|
|
}
|
|
|
|
/** Laufende Wiedergabe stoppen + Queue leeren */
|
|
stopPlayback(): void {
|
|
this.audioQueue = [];
|
|
this.isPlaying = false;
|
|
if (this.currentSound) {
|
|
this.currentSound.stop();
|
|
this.currentSound.release();
|
|
this.currentSound = null;
|
|
}
|
|
if (this.preloadedSound) {
|
|
this.preloadedSound.release();
|
|
this.preloadedSound = null;
|
|
if (this.preloadedPath) RNFS.unlink(this.preloadedPath).catch(() => {});
|
|
this.preloadedPath = '';
|
|
}
|
|
}
|
|
|
|
// --- Status & Callbacks ---
|
|
|
|
getRecordingState(): RecordingState {
|
|
return this.recordingState;
|
|
}
|
|
|
|
/** Callback fuer Aufnahmestatus-Aenderungen */
|
|
onStateChange(callback: RecordingStateCallback): () => void {
|
|
this.stateListeners.push(callback);
|
|
return () => {
|
|
this.stateListeners = this.stateListeners.filter(cb => cb !== callback);
|
|
};
|
|
}
|
|
|
|
/** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
|
|
onMeterUpdate(callback: MeterCallback): () => void {
|
|
this.meterListeners.push(callback);
|
|
return () => {
|
|
this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
|
|
};
|
|
}
|
|
|
|
/** Callback wenn VAD Stille erkennt (Auto-Stop) */
|
|
onSilenceDetected(callback: SilenceCallback): () => void {
|
|
this.silenceListeners.push(callback);
|
|
return () => {
|
|
this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
|
|
};
|
|
}
|
|
|
|
private setState(state: RecordingState): void {
|
|
if (this.recordingState !== state) {
|
|
this.recordingState = state;
|
|
this.stateListeners.forEach(cb => cb(state));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Singleton
|
|
const audioService = new AudioService();
|
|
export default audioService;
|