added audio workword, and recording, editied readme

2026-03-29 11:29:15 +02:00
parent b687f790ba
commit dbd97d3cf4
15 changed files with 912 additions and 798 deletions
@@ -1,13 +1,20 @@
 /**
 * Audio-Service fuer Sprach-Ein-/Ausgabe
 *
- * Verwaltet Mikrofon-Aufnahme und TTS-Audiowiedergabe.
- * Nutzt react-native-sound und die nativen Audio-APIs.
+ * Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
+ * TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
+ * Nutzt react-native-audio-recorder-player fuer Aufnahme.
 */

 import { Platform, PermissionsAndroid } from 'react-native';
 import Sound from 'react-native-sound';
 import RNFS from 'react-native-fs';
+import AudioRecorderPlayer, {
+  AudioEncoderAndroidType,
+  AudioSourceAndroidType,
+  AVEncodingOption,
+  OutputFormatAndroidType,
+} from 'react-native-audio-recorder-player';

 // --- Typen ---

@@ -23,6 +30,8 @@ export interface RecordingResult {
 export type RecordingState = 'idle' | 'recording' | 'processing';

 type RecordingStateCallback = (state: RecordingState) => void;
+type MeterCallback = (db: number) => void;
+type SilenceCallback = () => void;

 // --- Konstanten ---

@@ -30,17 +39,34 @@ const AUDIO_SAMPLE_RATE = 16000;
 const AUDIO_CHANNELS = 1;
 const AUDIO_ENCODING = 'audio/wav';

+// VAD (Voice Activity Detection) — Stille-Erkennung
+const VAD_SILENCE_THRESHOLD_DB = -45;  // dB unter dem als "Stille" gilt
+const VAD_SILENCE_DURATION_MS = 1800;  // ms Stille bevor Auto-Stop
+
 // --- Audio-Service ---

 class AudioService {
  private recordingState: RecordingState = 'idle';
  private recordingStartTime: number = 0;
  private stateListeners: RecordingStateCallback[] = [];
+  private meterListeners: MeterCallback[] = [];
+  private silenceListeners: SilenceCallback[] = [];
  private currentSound: Sound | null = null;
+  private recorder: AudioRecorderPlayer;
+  private recordingPath: string = '';
+
+  // VAD State
+  private vadEnabled: boolean = false;
+  private lastSpeechTime: number = 0;
+  private vadTimer: ReturnType<typeof setInterval> | null = null;
+
+  constructor() {
+    this.recorder = new AudioRecorderPlayer();
+    this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
+  }

  // --- Berechtigungen ---

-  /** Mikrofon-Berechtigung anfordern */
  async requestMicrophonePermission(): Promise<boolean> {
    if (Platform.OS !== 'android') {
      return true;
@@ -66,7 +92,7 @@ class AudioService {
  // --- Aufnahme ---

  /** Mikrofon-Aufnahme starten */
-  async startRecording(): Promise<boolean> {
+  async startRecording(autoStop: boolean = false): Promise<boolean> {
    if (this.recordingState !== 'idle') {
      console.warn('[Audio] Aufnahme laeuft bereits');
      return false;
@@ -79,11 +105,48 @@ class AudioService {
    }

    try {
-      // Nativer Aufnahme-Start ueber AudioRecorder-Bridge
-      // In Produktion: Native Module oder react-native-audio-recorder-player nutzen
+      // Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
+      this.stopPlayback();
+
+      this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
+
+      // Aufnahme mit Metering starten
+      await this.recorder.startRecorder(this.recordingPath, {
+        AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
+        AudioSourceAndroid: AudioSourceAndroidType.MIC,
+        OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
+      }, true); // meteringEnabled = true
+
+      // Metering-Callback
+      this.recorder.addRecordBackListener((e) => {
+        const db = e.currentMetering ?? -160;
+        this.meterListeners.forEach(cb => cb(db));
+
+        // VAD: Stille erkennen
+        if (this.vadEnabled) {
+          if (db > VAD_SILENCE_THRESHOLD_DB) {
+            this.lastSpeechTime = Date.now();
+          }
+        }
+      });
+
      this.recordingStartTime = Date.now();
+      this.lastSpeechTime = Date.now();
      this.setState('recording');
-      console.log('[Audio] Aufnahme gestartet');
+
+      // VAD aktivieren
+      this.vadEnabled = autoStop;
+      if (autoStop) {
+        this.vadTimer = setInterval(() => {
+          const silenceDuration = Date.now() - this.lastSpeechTime;
+          if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
+            console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
+            this.silenceListeners.forEach(cb => cb());
+          }
+        }, 200);
+      }
+
+      console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
      return true;
    } catch (err) {
      console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
@@ -100,22 +163,31 @@ class AudioService {
    }

    this.setState('processing');
+    this.vadEnabled = false;
+    if (this.vadTimer) {
+      clearInterval(this.vadTimer);
+      this.vadTimer = null;
+    }

    try {
+      await this.recorder.stopRecorder();
+      this.recorder.removeRecordBackListener();
+
      const durationMs = Date.now() - this.recordingStartTime;

-      // In Produktion: Audiodaten vom nativen Recorder holen
-      // const audioData = await NativeAudioRecorder.stop();
-      const base64Placeholder = ''; // Platzhalter bis Native-Bridge implementiert
+      // Audio-Datei als Base64 lesen
+      const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
+
+      // Temp-Datei aufraeumen
+      RNFS.unlink(this.recordingPath).catch(() => {});

      this.setState('idle');
-
-      console.log(`[Audio] Aufnahme beendet (${durationMs}ms)`);
+      console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);

      return {
-        base64: base64Placeholder,
+        base64: base64Data,
        durationMs,
-        mimeType: AUDIO_ENCODING,
+        mimeType: 'audio/mp4', // AAC in MP4 Container
      };
    } catch (err) {
      console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
@@ -134,7 +206,7 @@ class AudioService {
    this.stopPlayback();

    try {
-      // Base64 → temporaere WAV-Datei → Sound abspielen
+      // Base64 -> temporaere WAV-Datei -> Sound abspielen
      const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
      await RNFS.writeFile(tmpPath, base64Data, 'base64');

@@ -152,7 +224,6 @@ class AudioService {
          }
          this.currentSound?.release();
          this.currentSound = null;
-          // Temp-Datei aufraeumen
          RNFS.unlink(tmpPath).catch(() => {});
        });
      });
@@ -170,7 +241,7 @@ class AudioService {
    }
  }

-  // --- Status ---
+  // --- Status & Callbacks ---

  getRecordingState(): RecordingState {
    return this.recordingState;
@@ -184,6 +255,22 @@ class AudioService {
    };
  }

+  /** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
+  onMeterUpdate(callback: MeterCallback): () => void {
+    this.meterListeners.push(callback);
+    return () => {
+      this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
+    };
+  }
+
+  /** Callback wenn VAD Stille erkennt (Auto-Stop) */
+  onSilenceDetected(callback: SilenceCallback): () => void {
+    this.silenceListeners.push(callback);
+    return () => {
+      this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
+    };
+  }
+
  private setState(state: RecordingState): void {
    if (this.recordingState !== state) {
      this.recordingState = state;
@@ -0,0 +1,145 @@
+/**
+ * Wake Word Service — "ARIA" Erkennung
+ *
+ * Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
+ * Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
+ * zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
+ *
+ * Architektur:
+ *   App (Mikrofon) → Energie-Erkennung → Audio-Buffer
+ *   → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
+ *   → App startet Aufnahme
+ *
+ * Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
+ * Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
+ */
+
+import LiveAudioStream from 'react-native-live-audio-stream';
+
+type WakeWordCallback = () => void;
+type StateCallback = (state: WakeWordState) => void;
+
+export type WakeWordState = 'off' | 'listening' | 'detected';
+
+class WakeWordService {
+  private state: WakeWordState = 'off';
+  private wakeCallbacks: WakeWordCallback[] = [];
+  private stateCallbacks: StateCallback[] = [];
+  private isInitialized = false;
+
+  /** Wake Word Erkennung starten */
+  async start(): Promise<boolean> {
+    if (this.state === 'listening') return true;
+
+    try {
+      if (!this.isInitialized) {
+        LiveAudioStream.init({
+          sampleRate: 16000,
+          channels: 1,
+          bitsPerSample: 16,
+          audioSource: 6, // VOICE_RECOGNITION
+          bufferSize: 4096,
+        });
+        this.isInitialized = true;
+      }
+
+      // Audio-Stream starten und auf Energie pruefen
+      LiveAudioStream.start();
+
+      LiveAudioStream.on('data', (base64Chunk: string) => {
+        if (this.state !== 'listening') return;
+
+        // Base64 → Int16 Array → RMS berechnen
+        const raw = this._base64ToInt16(base64Chunk);
+        const rms = this._calculateRMS(raw);
+
+        // Schwellwert: wenn laut genug → Wake Word erkannt
+        // Phase 1: Einfache Energie-Erkennung (jemand spricht)
+        // Phase 2: Porcupine "ARIA" Keyword
+        if (rms > 2000) {
+          this.setState('detected');
+          this.wakeCallbacks.forEach(cb => cb());
+          // Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
+          this.stop();
+        }
+      });
+
+      this.setState('listening');
+      console.log('[WakeWord] Listening gestartet');
+      return true;
+    } catch (err) {
+      console.error('[WakeWord] Start fehlgeschlagen:', err);
+      return false;
+    }
+  }
+
+  /** Wake Word Erkennung stoppen */
+  stop(): void {
+    if (this.state === 'off') return;
+    try {
+      LiveAudioStream.stop();
+    } catch {}
+    this.setState('off');
+    console.log('[WakeWord] Gestoppt');
+  }
+
+  /** Nach Aufnahme erneut starten */
+  async resume(): Promise<void> {
+    // Kurze Pause damit Aufnahme das Mikrofon freigeben kann
+    setTimeout(() => {
+      if (this.state === 'off') {
+        this.start();
+      }
+    }, 500);
+  }
+
+  // --- Callbacks ---
+
+  onWakeWord(callback: WakeWordCallback): () => void {
+    this.wakeCallbacks.push(callback);
+    return () => {
+      this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
+    };
+  }
+
+  onStateChange(callback: StateCallback): () => void {
+    this.stateCallbacks.push(callback);
+    return () => {
+      this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
+    };
+  }
+
+  getState(): WakeWordState {
+    return this.state;
+  }
+
+  // --- Hilfsfunktionen ---
+
+  private setState(state: WakeWordState): void {
+    if (this.state !== state) {
+      this.state = state;
+      this.stateCallbacks.forEach(cb => cb(state));
+    }
+  }
+
+  private _base64ToInt16(base64: string): Int16Array {
+    const binary = atob(base64);
+    const bytes = new Uint8Array(binary.length);
+    for (let i = 0; i < binary.length; i++) {
+      bytes[i] = binary.charCodeAt(i);
+    }
+    return new Int16Array(bytes.buffer);
+  }
+
+  private _calculateRMS(samples: Int16Array): number {
+    if (samples.length === 0) return 0;
+    let sum = 0;
+    for (let i = 0; i < samples.length; i++) {
+      sum += samples[i] * samples[i];
+    }
+    return Math.sqrt(sum / samples.length);
+  }
+}
+
+const wakeWordService = new WakeWordService();
+export default wakeWordService;