added audio workword, and recording, editied readme
This commit is contained in:
+104
-17
@@ -1,13 +1,20 @@
|
||||
/**
|
||||
* Audio-Service fuer Sprach-Ein-/Ausgabe
|
||||
*
|
||||
* Verwaltet Mikrofon-Aufnahme und TTS-Audiowiedergabe.
|
||||
* Nutzt react-native-sound und die nativen Audio-APIs.
|
||||
* Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
|
||||
* TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
|
||||
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
|
||||
*/
|
||||
|
||||
import { Platform, PermissionsAndroid } from 'react-native';
|
||||
import Sound from 'react-native-sound';
|
||||
import RNFS from 'react-native-fs';
|
||||
import AudioRecorderPlayer, {
|
||||
AudioEncoderAndroidType,
|
||||
AudioSourceAndroidType,
|
||||
AVEncodingOption,
|
||||
OutputFormatAndroidType,
|
||||
} from 'react-native-audio-recorder-player';
|
||||
|
||||
// --- Typen ---
|
||||
|
||||
@@ -23,6 +30,8 @@ export interface RecordingResult {
|
||||
export type RecordingState = 'idle' | 'recording' | 'processing';
|
||||
|
||||
type RecordingStateCallback = (state: RecordingState) => void;
|
||||
type MeterCallback = (db: number) => void;
|
||||
type SilenceCallback = () => void;
|
||||
|
||||
// --- Konstanten ---
|
||||
|
||||
@@ -30,17 +39,34 @@ const AUDIO_SAMPLE_RATE = 16000;
|
||||
const AUDIO_CHANNELS = 1;
|
||||
const AUDIO_ENCODING = 'audio/wav';
|
||||
|
||||
// VAD (Voice Activity Detection) — Stille-Erkennung
|
||||
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
||||
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
||||
|
||||
// --- Audio-Service ---
|
||||
|
||||
class AudioService {
|
||||
private recordingState: RecordingState = 'idle';
|
||||
private recordingStartTime: number = 0;
|
||||
private stateListeners: RecordingStateCallback[] = [];
|
||||
private meterListeners: MeterCallback[] = [];
|
||||
private silenceListeners: SilenceCallback[] = [];
|
||||
private currentSound: Sound | null = null;
|
||||
private recorder: AudioRecorderPlayer;
|
||||
private recordingPath: string = '';
|
||||
|
||||
// VAD State
|
||||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
constructor() {
|
||||
this.recorder = new AudioRecorderPlayer();
|
||||
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
|
||||
}
|
||||
|
||||
// --- Berechtigungen ---
|
||||
|
||||
/** Mikrofon-Berechtigung anfordern */
|
||||
async requestMicrophonePermission(): Promise<boolean> {
|
||||
if (Platform.OS !== 'android') {
|
||||
return true;
|
||||
@@ -66,7 +92,7 @@ class AudioService {
|
||||
// --- Aufnahme ---
|
||||
|
||||
/** Mikrofon-Aufnahme starten */
|
||||
async startRecording(): Promise<boolean> {
|
||||
async startRecording(autoStop: boolean = false): Promise<boolean> {
|
||||
if (this.recordingState !== 'idle') {
|
||||
console.warn('[Audio] Aufnahme laeuft bereits');
|
||||
return false;
|
||||
@@ -79,11 +105,48 @@ class AudioService {
|
||||
}
|
||||
|
||||
try {
|
||||
// Nativer Aufnahme-Start ueber AudioRecorder-Bridge
|
||||
// In Produktion: Native Module oder react-native-audio-recorder-player nutzen
|
||||
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||
this.stopPlayback();
|
||||
|
||||
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
||||
|
||||
// Aufnahme mit Metering starten
|
||||
await this.recorder.startRecorder(this.recordingPath, {
|
||||
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
|
||||
AudioSourceAndroid: AudioSourceAndroidType.MIC,
|
||||
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
|
||||
}, true); // meteringEnabled = true
|
||||
|
||||
// Metering-Callback
|
||||
this.recorder.addRecordBackListener((e) => {
|
||||
const db = e.currentMetering ?? -160;
|
||||
this.meterListeners.forEach(cb => cb(db));
|
||||
|
||||
// VAD: Stille erkennen
|
||||
if (this.vadEnabled) {
|
||||
if (db > VAD_SILENCE_THRESHOLD_DB) {
|
||||
this.lastSpeechTime = Date.now();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.recordingStartTime = Date.now();
|
||||
this.lastSpeechTime = Date.now();
|
||||
this.setState('recording');
|
||||
console.log('[Audio] Aufnahme gestartet');
|
||||
|
||||
// VAD aktivieren
|
||||
this.vadEnabled = autoStop;
|
||||
if (autoStop) {
|
||||
this.vadTimer = setInterval(() => {
|
||||
const silenceDuration = Date.now() - this.lastSpeechTime;
|
||||
if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
|
||||
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
|
||||
this.silenceListeners.forEach(cb => cb());
|
||||
}
|
||||
}, 200);
|
||||
}
|
||||
|
||||
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
|
||||
@@ -100,22 +163,31 @@ class AudioService {
|
||||
}
|
||||
|
||||
this.setState('processing');
|
||||
this.vadEnabled = false;
|
||||
if (this.vadTimer) {
|
||||
clearInterval(this.vadTimer);
|
||||
this.vadTimer = null;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.recorder.stopRecorder();
|
||||
this.recorder.removeRecordBackListener();
|
||||
|
||||
const durationMs = Date.now() - this.recordingStartTime;
|
||||
|
||||
// In Produktion: Audiodaten vom nativen Recorder holen
|
||||
// const audioData = await NativeAudioRecorder.stop();
|
||||
const base64Placeholder = ''; // Platzhalter bis Native-Bridge implementiert
|
||||
// Audio-Datei als Base64 lesen
|
||||
const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
|
||||
|
||||
// Temp-Datei aufraeumen
|
||||
RNFS.unlink(this.recordingPath).catch(() => {});
|
||||
|
||||
this.setState('idle');
|
||||
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms)`);
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);
|
||||
|
||||
return {
|
||||
base64: base64Placeholder,
|
||||
base64: base64Data,
|
||||
durationMs,
|
||||
mimeType: AUDIO_ENCODING,
|
||||
mimeType: 'audio/mp4', // AAC in MP4 Container
|
||||
};
|
||||
} catch (err) {
|
||||
console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
|
||||
@@ -134,7 +206,7 @@ class AudioService {
|
||||
this.stopPlayback();
|
||||
|
||||
try {
|
||||
// Base64 → temporaere WAV-Datei → Sound abspielen
|
||||
// Base64 -> temporaere WAV-Datei -> Sound abspielen
|
||||
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
||||
|
||||
@@ -152,7 +224,6 @@ class AudioService {
|
||||
}
|
||||
this.currentSound?.release();
|
||||
this.currentSound = null;
|
||||
// Temp-Datei aufraeumen
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
});
|
||||
});
|
||||
@@ -170,7 +241,7 @@ class AudioService {
|
||||
}
|
||||
}
|
||||
|
||||
// --- Status ---
|
||||
// --- Status & Callbacks ---
|
||||
|
||||
getRecordingState(): RecordingState {
|
||||
return this.recordingState;
|
||||
@@ -184,6 +255,22 @@ class AudioService {
|
||||
};
|
||||
}
|
||||
|
||||
/** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
|
||||
onMeterUpdate(callback: MeterCallback): () => void {
|
||||
this.meterListeners.push(callback);
|
||||
return () => {
|
||||
this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
/** Callback wenn VAD Stille erkennt (Auto-Stop) */
|
||||
onSilenceDetected(callback: SilenceCallback): () => void {
|
||||
this.silenceListeners.push(callback);
|
||||
return () => {
|
||||
this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
private setState(state: RecordingState): void {
|
||||
if (this.recordingState !== state) {
|
||||
this.recordingState = state;
|
||||
|
||||
@@ -0,0 +1,145 @@
|
||||
/**
|
||||
* Wake Word Service — "ARIA" Erkennung
|
||||
*
|
||||
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
|
||||
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
|
||||
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
|
||||
*
|
||||
* Architektur:
|
||||
* App (Mikrofon) → Energie-Erkennung → Audio-Buffer
|
||||
* → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
|
||||
* → App startet Aufnahme
|
||||
*
|
||||
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
|
||||
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
|
||||
*/
|
||||
|
||||
import LiveAudioStream from 'react-native-live-audio-stream';
|
||||
|
||||
type WakeWordCallback = () => void;
|
||||
type StateCallback = (state: WakeWordState) => void;
|
||||
|
||||
export type WakeWordState = 'off' | 'listening' | 'detected';
|
||||
|
||||
class WakeWordService {
|
||||
private state: WakeWordState = 'off';
|
||||
private wakeCallbacks: WakeWordCallback[] = [];
|
||||
private stateCallbacks: StateCallback[] = [];
|
||||
private isInitialized = false;
|
||||
|
||||
/** Wake Word Erkennung starten */
|
||||
async start(): Promise<boolean> {
|
||||
if (this.state === 'listening') return true;
|
||||
|
||||
try {
|
||||
if (!this.isInitialized) {
|
||||
LiveAudioStream.init({
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
bitsPerSample: 16,
|
||||
audioSource: 6, // VOICE_RECOGNITION
|
||||
bufferSize: 4096,
|
||||
});
|
||||
this.isInitialized = true;
|
||||
}
|
||||
|
||||
// Audio-Stream starten und auf Energie pruefen
|
||||
LiveAudioStream.start();
|
||||
|
||||
LiveAudioStream.on('data', (base64Chunk: string) => {
|
||||
if (this.state !== 'listening') return;
|
||||
|
||||
// Base64 → Int16 Array → RMS berechnen
|
||||
const raw = this._base64ToInt16(base64Chunk);
|
||||
const rms = this._calculateRMS(raw);
|
||||
|
||||
// Schwellwert: wenn laut genug → Wake Word erkannt
|
||||
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
|
||||
// Phase 2: Porcupine "ARIA" Keyword
|
||||
if (rms > 2000) {
|
||||
this.setState('detected');
|
||||
this.wakeCallbacks.forEach(cb => cb());
|
||||
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
|
||||
this.stop();
|
||||
}
|
||||
});
|
||||
|
||||
this.setState('listening');
|
||||
console.log('[WakeWord] Listening gestartet');
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error('[WakeWord] Start fehlgeschlagen:', err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Wake Word Erkennung stoppen */
|
||||
stop(): void {
|
||||
if (this.state === 'off') return;
|
||||
try {
|
||||
LiveAudioStream.stop();
|
||||
} catch {}
|
||||
this.setState('off');
|
||||
console.log('[WakeWord] Gestoppt');
|
||||
}
|
||||
|
||||
/** Nach Aufnahme erneut starten */
|
||||
async resume(): Promise<void> {
|
||||
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
|
||||
setTimeout(() => {
|
||||
if (this.state === 'off') {
|
||||
this.start();
|
||||
}
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// --- Callbacks ---
|
||||
|
||||
onWakeWord(callback: WakeWordCallback): () => void {
|
||||
this.wakeCallbacks.push(callback);
|
||||
return () => {
|
||||
this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
onStateChange(callback: StateCallback): () => void {
|
||||
this.stateCallbacks.push(callback);
|
||||
return () => {
|
||||
this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
getState(): WakeWordState {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
// --- Hilfsfunktionen ---
|
||||
|
||||
private setState(state: WakeWordState): void {
|
||||
if (this.state !== state) {
|
||||
this.state = state;
|
||||
this.stateCallbacks.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
private _base64ToInt16(base64: string): Int16Array {
|
||||
const binary = atob(base64);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) {
|
||||
bytes[i] = binary.charCodeAt(i);
|
||||
}
|
||||
return new Int16Array(bytes.buffer);
|
||||
}
|
||||
|
||||
private _calculateRMS(samples: Int16Array): number {
|
||||
if (samples.length === 0) return 0;
|
||||
let sum = 0;
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
sum += samples[i] * samples[i];
|
||||
}
|
||||
return Math.sqrt(sum / samples.length);
|
||||
}
|
||||
}
|
||||
|
||||
const wakeWordService = new WakeWordService();
|
||||
export default wakeWordService;
|
||||
Reference in New Issue
Block a user