ARIA-AGENT/android/src/services/wakeword.ts

146 lines
4.0 KiB
TypeScript

/**
* Wake Word Service — "ARIA" Erkennung
*
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
*
* Architektur:
* App (Mikrofon) → Energie-Erkennung → Audio-Buffer
* → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
* → App startet Aufnahme
*
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
*/
import LiveAudioStream from 'react-native-live-audio-stream';
type WakeWordCallback = () => void;
type StateCallback = (state: WakeWordState) => void;
export type WakeWordState = 'off' | 'listening' | 'detected';
class WakeWordService {
private state: WakeWordState = 'off';
private wakeCallbacks: WakeWordCallback[] = [];
private stateCallbacks: StateCallback[] = [];
private isInitialized = false;
/** Wake Word Erkennung starten */
async start(): Promise<boolean> {
if (this.state === 'listening') return true;
try {
if (!this.isInitialized) {
LiveAudioStream.init({
sampleRate: 16000,
channels: 1,
bitsPerSample: 16,
audioSource: 6, // VOICE_RECOGNITION
bufferSize: 4096,
});
this.isInitialized = true;
}
// Audio-Stream starten und auf Energie pruefen
LiveAudioStream.start();
LiveAudioStream.on('data', (base64Chunk: string) => {
if (this.state !== 'listening') return;
// Base64 → Int16 Array → RMS berechnen
const raw = this._base64ToInt16(base64Chunk);
const rms = this._calculateRMS(raw);
// Schwellwert: wenn laut genug → Wake Word erkannt
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
// Phase 2: Porcupine "ARIA" Keyword
if (rms > 2000) {
this.setState('detected');
this.wakeCallbacks.forEach(cb => cb());
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
this.stop();
}
});
this.setState('listening');
console.log('[WakeWord] Listening gestartet');
return true;
} catch (err) {
console.error('[WakeWord] Start fehlgeschlagen:', err);
return false;
}
}
/** Wake Word Erkennung stoppen */
stop(): void {
if (this.state === 'off') return;
try {
LiveAudioStream.stop();
} catch {}
this.setState('off');
console.log('[WakeWord] Gestoppt');
}
/** Nach Aufnahme erneut starten */
async resume(): Promise<void> {
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
setTimeout(() => {
if (this.state === 'off') {
this.start();
}
}, 500);
}
// --- Callbacks ---
onWakeWord(callback: WakeWordCallback): () => void {
this.wakeCallbacks.push(callback);
return () => {
this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
};
}
onStateChange(callback: StateCallback): () => void {
this.stateCallbacks.push(callback);
return () => {
this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
};
}
getState(): WakeWordState {
return this.state;
}
// --- Hilfsfunktionen ---
private setState(state: WakeWordState): void {
if (this.state !== state) {
this.state = state;
this.stateCallbacks.forEach(cb => cb(state));
}
}
private _base64ToInt16(base64: string): Int16Array {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Int16Array(bytes.buffer);
}
private _calculateRMS(samples: Int16Array): number {
if (samples.length === 0) return 0;
let sum = 0;
for (let i = 0; i < samples.length; i++) {
sum += samples[i] * samples[i];
}
return Math.sqrt(sum / samples.length);
}
}
const wakeWordService = new WakeWordService();
export default wakeWordService;