added audio workword, and recording, editied readme

This commit is contained in:
2026-03-29 11:29:15 +02:00
parent b687f790ba
commit dbd97d3cf4
15 changed files with 912 additions and 798 deletions
+104 -17
View File
@@ -1,13 +1,20 @@
/**
* Audio-Service fuer Sprach-Ein-/Ausgabe
*
* Verwaltet Mikrofon-Aufnahme und TTS-Audiowiedergabe.
* Nutzt react-native-sound und die nativen Audio-APIs.
* Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
* TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
*/
import { Platform, PermissionsAndroid } from 'react-native';
import Sound from 'react-native-sound';
import RNFS from 'react-native-fs';
import AudioRecorderPlayer, {
AudioEncoderAndroidType,
AudioSourceAndroidType,
AVEncodingOption,
OutputFormatAndroidType,
} from 'react-native-audio-recorder-player';
// --- Typen ---
@@ -23,6 +30,8 @@ export interface RecordingResult {
export type RecordingState = 'idle' | 'recording' | 'processing';
type RecordingStateCallback = (state: RecordingState) => void;
type MeterCallback = (db: number) => void;
type SilenceCallback = () => void;
// --- Konstanten ---
@@ -30,17 +39,34 @@ const AUDIO_SAMPLE_RATE = 16000;
const AUDIO_CHANNELS = 1;
const AUDIO_ENCODING = 'audio/wav';
// VAD (Voice Activity Detection) — Stille-Erkennung
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
// --- Audio-Service ---
class AudioService {
private recordingState: RecordingState = 'idle';
private recordingStartTime: number = 0;
private stateListeners: RecordingStateCallback[] = [];
private meterListeners: MeterCallback[] = [];
private silenceListeners: SilenceCallback[] = [];
private currentSound: Sound | null = null;
private recorder: AudioRecorderPlayer;
private recordingPath: string = '';
// VAD State
private vadEnabled: boolean = false;
private lastSpeechTime: number = 0;
private vadTimer: ReturnType<typeof setInterval> | null = null;
constructor() {
this.recorder = new AudioRecorderPlayer();
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
}
// --- Berechtigungen ---
/** Mikrofon-Berechtigung anfordern */
async requestMicrophonePermission(): Promise<boolean> {
if (Platform.OS !== 'android') {
return true;
@@ -66,7 +92,7 @@ class AudioService {
// --- Aufnahme ---
/** Mikrofon-Aufnahme starten */
async startRecording(): Promise<boolean> {
async startRecording(autoStop: boolean = false): Promise<boolean> {
if (this.recordingState !== 'idle') {
console.warn('[Audio] Aufnahme laeuft bereits');
return false;
@@ -79,11 +105,48 @@ class AudioService {
}
try {
// Nativer Aufnahme-Start ueber AudioRecorder-Bridge
// In Produktion: Native Module oder react-native-audio-recorder-player nutzen
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
this.stopPlayback();
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
// Aufnahme mit Metering starten
await this.recorder.startRecorder(this.recordingPath, {
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
AudioSourceAndroid: AudioSourceAndroidType.MIC,
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
}, true); // meteringEnabled = true
// Metering-Callback
this.recorder.addRecordBackListener((e) => {
const db = e.currentMetering ?? -160;
this.meterListeners.forEach(cb => cb(db));
// VAD: Stille erkennen
if (this.vadEnabled) {
if (db > VAD_SILENCE_THRESHOLD_DB) {
this.lastSpeechTime = Date.now();
}
}
});
this.recordingStartTime = Date.now();
this.lastSpeechTime = Date.now();
this.setState('recording');
console.log('[Audio] Aufnahme gestartet');
// VAD aktivieren
this.vadEnabled = autoStop;
if (autoStop) {
this.vadTimer = setInterval(() => {
const silenceDuration = Date.now() - this.lastSpeechTime;
if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
this.silenceListeners.forEach(cb => cb());
}
}, 200);
}
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
return true;
} catch (err) {
console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
@@ -100,22 +163,31 @@ class AudioService {
}
this.setState('processing');
this.vadEnabled = false;
if (this.vadTimer) {
clearInterval(this.vadTimer);
this.vadTimer = null;
}
try {
await this.recorder.stopRecorder();
this.recorder.removeRecordBackListener();
const durationMs = Date.now() - this.recordingStartTime;
// In Produktion: Audiodaten vom nativen Recorder holen
// const audioData = await NativeAudioRecorder.stop();
const base64Placeholder = ''; // Platzhalter bis Native-Bridge implementiert
// Audio-Datei als Base64 lesen
const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
// Temp-Datei aufraeumen
RNFS.unlink(this.recordingPath).catch(() => {});
this.setState('idle');
console.log(`[Audio] Aufnahme beendet (${durationMs}ms)`);
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);
return {
base64: base64Placeholder,
base64: base64Data,
durationMs,
mimeType: AUDIO_ENCODING,
mimeType: 'audio/mp4', // AAC in MP4 Container
};
} catch (err) {
console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
@@ -134,7 +206,7 @@ class AudioService {
this.stopPlayback();
try {
// Base64 temporaere WAV-Datei Sound abspielen
// Base64 -> temporaere WAV-Datei -> Sound abspielen
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
await RNFS.writeFile(tmpPath, base64Data, 'base64');
@@ -152,7 +224,6 @@ class AudioService {
}
this.currentSound?.release();
this.currentSound = null;
// Temp-Datei aufraeumen
RNFS.unlink(tmpPath).catch(() => {});
});
});
@@ -170,7 +241,7 @@ class AudioService {
}
}
// --- Status ---
// --- Status & Callbacks ---
getRecordingState(): RecordingState {
return this.recordingState;
@@ -184,6 +255,22 @@ class AudioService {
};
}
/** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
onMeterUpdate(callback: MeterCallback): () => void {
this.meterListeners.push(callback);
return () => {
this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
};
}
/** Callback wenn VAD Stille erkennt (Auto-Stop) */
onSilenceDetected(callback: SilenceCallback): () => void {
this.silenceListeners.push(callback);
return () => {
this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
};
}
private setState(state: RecordingState): void {
if (this.recordingState !== state) {
this.recordingState = state;
+145
View File
@@ -0,0 +1,145 @@
/**
* Wake Word Service — "ARIA" Erkennung
*
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
*
* Architektur:
* App (Mikrofon) → Energie-Erkennung → Audio-Buffer
* → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
* → App startet Aufnahme
*
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
*/
import LiveAudioStream from 'react-native-live-audio-stream';
type WakeWordCallback = () => void;
type StateCallback = (state: WakeWordState) => void;
export type WakeWordState = 'off' | 'listening' | 'detected';
class WakeWordService {
private state: WakeWordState = 'off';
private wakeCallbacks: WakeWordCallback[] = [];
private stateCallbacks: StateCallback[] = [];
private isInitialized = false;
/** Wake Word Erkennung starten */
async start(): Promise<boolean> {
if (this.state === 'listening') return true;
try {
if (!this.isInitialized) {
LiveAudioStream.init({
sampleRate: 16000,
channels: 1,
bitsPerSample: 16,
audioSource: 6, // VOICE_RECOGNITION
bufferSize: 4096,
});
this.isInitialized = true;
}
// Audio-Stream starten und auf Energie pruefen
LiveAudioStream.start();
LiveAudioStream.on('data', (base64Chunk: string) => {
if (this.state !== 'listening') return;
// Base64 → Int16 Array → RMS berechnen
const raw = this._base64ToInt16(base64Chunk);
const rms = this._calculateRMS(raw);
// Schwellwert: wenn laut genug → Wake Word erkannt
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
// Phase 2: Porcupine "ARIA" Keyword
if (rms > 2000) {
this.setState('detected');
this.wakeCallbacks.forEach(cb => cb());
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
this.stop();
}
});
this.setState('listening');
console.log('[WakeWord] Listening gestartet');
return true;
} catch (err) {
console.error('[WakeWord] Start fehlgeschlagen:', err);
return false;
}
}
/** Wake Word Erkennung stoppen */
stop(): void {
if (this.state === 'off') return;
try {
LiveAudioStream.stop();
} catch {}
this.setState('off');
console.log('[WakeWord] Gestoppt');
}
/** Nach Aufnahme erneut starten */
async resume(): Promise<void> {
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
setTimeout(() => {
if (this.state === 'off') {
this.start();
}
}, 500);
}
// --- Callbacks ---
onWakeWord(callback: WakeWordCallback): () => void {
this.wakeCallbacks.push(callback);
return () => {
this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
};
}
onStateChange(callback: StateCallback): () => void {
this.stateCallbacks.push(callback);
return () => {
this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
};
}
getState(): WakeWordState {
return this.state;
}
// --- Hilfsfunktionen ---
private setState(state: WakeWordState): void {
if (this.state !== state) {
this.state = state;
this.stateCallbacks.forEach(cb => cb(state));
}
}
private _base64ToInt16(base64: string): Int16Array {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Int16Array(bytes.buffer);
}
private _calculateRMS(samples: Int16Array): number {
if (samples.length === 0) return 0;
let sum = 0;
for (let i = 0; i < samples.length; i++) {
sum += samples[i] * samples[i];
}
return Math.sqrt(sum / samples.length);
}
}
const wakeWordService = new WakeWordService();
export default wakeWordService;