added audio workword, and recording, editied readme

This commit is contained in:
duffyduck 2026-03-29 11:29:15 +02:00
parent b687f790ba
commit dbd97d3cf4
15 changed files with 912 additions and 798 deletions

1139
README.md

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
<uses-permission android:name="android.permission.INTERNET" /> <uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.CAMERA" /> <uses-permission android:name="android.permission.CAMERA" />
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<application <application
android:name=".MainApplication" android:name=".MainApplication"

View File

@ -23,7 +23,9 @@
"react-native-permissions": "^4.1.4", "react-native-permissions": "^4.1.4",
"react-native-camera-kit": "^13.0.0", "react-native-camera-kit": "^13.0.0",
"@react-native-async-storage/async-storage": "^1.21.0", "@react-native-async-storage/async-storage": "^1.21.0",
"react-native-fs": "^2.20.0" "react-native-fs": "^2.20.0",
"react-native-audio-recorder-player": "^3.6.7",
"react-native-live-audio-stream": "^1.3.5"
}, },
"devDependencies": { "devDependencies": {
"typescript": "^5.3.3", "typescript": "^5.3.3",

View File

@ -1,18 +1,23 @@
/** /**
* VoiceButton - Push-to-Talk Aufnahmeknopf * VoiceButton - Push-to-Talk + Auto-Stop Aufnahmeknopf
*
* Zwei Modi:
* 1. Push-to-Talk: gedrueckt halten zum Aufnehmen, loslassen zum Senden
* 2. Tap-to-Talk: einmal tippen startet Aufnahme, VAD stoppt automatisch bei Stille
* (auch genutzt fuer Wake-Word-getriggerte Aufnahme)
* *
* Grosser runder Button: gedrueckt halten zum Aufnehmen, loslassen zum Senden.
* Visuelles Feedback durch pulsierende Animation waehrend der Aufnahme. * Visuelles Feedback durch pulsierende Animation waehrend der Aufnahme.
*/ */
import React, { useState, useRef, useEffect } from 'react'; import React, { useState, useRef, useEffect, useCallback } from 'react';
import { import {
View, View,
Text, Text,
Animated, Animated,
StyleSheet, StyleSheet,
GestureResponderEvent,
Easing, Easing,
TouchableOpacity,
Pressable,
} from 'react-native'; } from 'react-native';
import audioService, { RecordingResult } from '../services/audio'; import audioService, { RecordingResult } from '../services/audio';
@ -23,15 +28,23 @@ interface VoiceButtonProps {
onRecordingComplete: (result: RecordingResult) => void; onRecordingComplete: (result: RecordingResult) => void;
/** Button deaktivieren */ /** Button deaktivieren */
disabled?: boolean; disabled?: boolean;
/** Wake-Word-Modus aktiv (zeigt Indikator) */
wakeWordActive?: boolean;
} }
// --- Komponente --- // --- Komponente ---
const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled = false }) => { const VoiceButton: React.FC<VoiceButtonProps> = ({
onRecordingComplete,
disabled = false,
wakeWordActive = false,
}) => {
const [isRecording, setIsRecording] = useState(false); const [isRecording, setIsRecording] = useState(false);
const [durationMs, setDurationMs] = useState(0); const [durationMs, setDurationMs] = useState(0);
const [meterDb, setMeterDb] = useState(-160);
const pulseAnim = useRef(new Animated.Value(1)).current; const pulseAnim = useRef(new Animated.Value(1)).current;
const durationTimer = useRef<ReturnType<typeof setInterval> | null>(null); const durationTimer = useRef<ReturnType<typeof setInterval> | null>(null);
const isLongPress = useRef(false);
// Puls-Animation starten/stoppen // Puls-Animation starten/stoppen
useEffect(() => { useEffect(() => {
@ -59,53 +72,111 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled
} }
}, [isRecording, pulseAnim]); }, [isRecording, pulseAnim]);
// Aufnahmedauer zaehlen // Aufnahmedauer zaehlen + Metering
useEffect(() => { useEffect(() => {
if (isRecording) { if (isRecording) {
setDurationMs(0); setDurationMs(0);
durationTimer.current = setInterval(() => { durationTimer.current = setInterval(() => {
setDurationMs(prev => prev + 100); setDurationMs(prev => prev + 100);
}, 100); }, 100);
const unsubMeter = audioService.onMeterUpdate(setMeterDb);
return () => {
unsubMeter();
if (durationTimer.current) clearInterval(durationTimer.current);
};
} else { } else {
if (durationTimer.current) { if (durationTimer.current) {
clearInterval(durationTimer.current); clearInterval(durationTimer.current);
durationTimer.current = null; durationTimer.current = null;
} }
} }
return () => {
if (durationTimer.current) {
clearInterval(durationTimer.current);
}
};
}, [isRecording]); }, [isRecording]);
const handlePressIn = async (_event: GestureResponderEvent) => { // VAD Silence Callback — Auto-Stop
if (disabled) return; useEffect(() => {
const started = await audioService.startRecording(); const unsubSilence = audioService.onSilenceDetected(async () => {
if (!isRecording) return;
setIsRecording(false);
const result = await audioService.stopRecording();
if (result && result.durationMs > 500) {
onRecordingComplete(result);
}
});
return unsubSilence;
}, [isRecording, onRecordingComplete]);
// Auto-Start fuer Wake Word (extern getriggert)
const startAutoRecording = useCallback(async () => {
if (disabled || isRecording) return;
const started = await audioService.startRecording(true); // autoStop = true
if (started) {
isLongPress.current = false;
setIsRecording(true);
}
}, [disabled, isRecording]);
// Push-to-Talk: Lang druecken
const handlePressIn = async () => {
if (disabled || isRecording) return;
isLongPress.current = true;
const started = await audioService.startRecording(false); // kein autoStop
if (started) { if (started) {
setIsRecording(true); setIsRecording(true);
} }
}; };
const handlePressOut = async (_event: GestureResponderEvent) => { const handlePressOut = async () => {
if (!isRecording) return; if (!isRecording || !isLongPress.current) return;
isLongPress.current = false;
setIsRecording(false); setIsRecording(false);
const result = await audioService.stopRecording(); const result = await audioService.stopRecording();
if (result && result.durationMs > 300) { if (result && result.durationMs > 300) {
// Nur senden wenn laenger als 300ms (versehentliches Tippen vermeiden)
onRecordingComplete(result); onRecordingComplete(result);
} }
}; };
// Tap-to-Talk: Einmal tippen startet mit Auto-Stop
const handleTap = async () => {
if (disabled) return;
if (isRecording) {
// Aufnahme manuell stoppen
setIsRecording(false);
const result = await audioService.stopRecording();
if (result && result.durationMs > 300) {
onRecordingComplete(result);
}
} else {
// Aufnahme mit Auto-Stop starten
const started = await audioService.startRecording(true);
if (started) {
isLongPress.current = false;
setIsRecording(true);
}
}
};
// Expose startAutoRecording via ref fuer Wake Word
React.useImperativeHandle(
React.createRef(),
() => ({ startAutoRecording }),
[startAutoRecording],
);
const formatDuration = (ms: number): string => { const formatDuration = (ms: number): string => {
const seconds = Math.floor(ms / 1000); const seconds = Math.floor(ms / 1000);
const tenths = Math.floor((ms % 1000) / 100); const tenths = Math.floor((ms % 1000) / 100);
return `${seconds}.${tenths}s`; return `${seconds}.${tenths}s`;
}; };
// Meter-Visualisierung (0-1 Skala)
const meterLevel = Math.max(0, Math.min(1, (meterDb + 60) / 60));
return ( return (
<View style={styles.container}> <View style={styles.container}>
{wakeWordActive && !isRecording && (
<View style={styles.wakeWordDot} />
)}
<Animated.View <Animated.View
style={[ style={[
styles.buttonOuter, styles.buttonOuter,
@ -117,17 +188,28 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled
onResponderRelease={handlePressOut} onResponderRelease={handlePressOut}
onResponderTerminate={handlePressOut} onResponderTerminate={handlePressOut}
> >
<View style={[styles.buttonInner, isRecording && styles.buttonInnerRecording]}> <TouchableOpacity
activeOpacity={0.8}
onPress={handleTap}
disabled={disabled}
style={[styles.buttonInner, isRecording && styles.buttonInnerRecording]}
>
<Text style={styles.buttonIcon}>{isRecording ? '⏹' : '🎙'}</Text> <Text style={styles.buttonIcon}>{isRecording ? '⏹' : '🎙'}</Text>
</View> </TouchableOpacity>
</Animated.View> </Animated.View>
{isRecording && ( {isRecording && (
<Text style={styles.durationText}>{formatDuration(durationMs)}</Text> <View style={styles.infoRow}>
<View style={[styles.meterBar, { width: `${meterLevel * 100}%` }]} />
<Text style={styles.durationText}>{formatDuration(durationMs)}</Text>
</View>
)} )}
</View> </View>
); );
}; };
// Expose startAutoRecording fuer externe Aufrufe (Wake Word)
export type VoiceButtonHandle = { startAutoRecording: () => Promise<void> };
// --- Styles --- // --- Styles ---
const styles = StyleSheet.create({ const styles = StyleSheet.create({
@ -135,6 +217,16 @@ const styles = StyleSheet.create({
alignItems: 'center', alignItems: 'center',
justifyContent: 'center', justifyContent: 'center',
}, },
wakeWordDot: {
position: 'absolute',
top: -4,
right: -4,
width: 10,
height: 10,
borderRadius: 5,
backgroundColor: '#34C759',
zIndex: 10,
},
buttonOuter: { buttonOuter: {
width: 64, width: 64,
height: 64, height: 64,
@ -165,10 +257,20 @@ const styles = StyleSheet.create({
buttonIcon: { buttonIcon: {
fontSize: 24, fontSize: 24,
}, },
infoRow: {
alignItems: 'center',
marginTop: 4,
width: 80,
},
meterBar: {
height: 3,
backgroundColor: '#FF3B30',
borderRadius: 2,
marginBottom: 2,
},
durationText: { durationText: {
color: '#FF3B30', color: '#FF3B30',
fontSize: 12, fontSize: 12,
marginTop: 4,
fontVariant: ['tabular-nums'], fontVariant: ['tabular-nums'],
}, },
}); });

View File

@ -20,6 +20,7 @@ import {
import AsyncStorage from '@react-native-async-storage/async-storage'; import AsyncStorage from '@react-native-async-storage/async-storage';
import rvs, { RVSMessage, ConnectionState } from '../services/rvs'; import rvs, { RVSMessage, ConnectionState } from '../services/rvs';
import audioService from '../services/audio'; import audioService from '../services/audio';
import wakeWordService from '../services/wakeword';
import VoiceButton from '../components/VoiceButton'; import VoiceButton from '../components/VoiceButton';
import FileUpload, { FileData } from '../components/FileUpload'; import FileUpload, { FileData } from '../components/FileUpload';
import CameraUpload, { PhotoData } from '../components/CameraUpload'; import CameraUpload, { PhotoData } from '../components/CameraUpload';
@ -56,6 +57,7 @@ const ChatScreen: React.FC = () => {
const [showFileUpload, setShowFileUpload] = useState(false); const [showFileUpload, setShowFileUpload] = useState(false);
const [showCameraUpload, setShowCameraUpload] = useState(false); const [showCameraUpload, setShowCameraUpload] = useState(false);
const [gpsEnabled, setGpsEnabled] = useState(false); const [gpsEnabled, setGpsEnabled] = useState(false);
const [wakeWordActive, setWakeWordActive] = useState(false);
const flatListRef = useRef<FlatList>(null); const flatListRef = useRef<FlatList>(null);
const messageIdCounter = useRef(0); const messageIdCounter = useRef(0);
@ -134,6 +136,62 @@ const ChatScreen: React.FC = () => {
}; };
}, []); }, []);
// Wake Word: "ARIA" Erkennung → Auto-Aufnahme starten
useEffect(() => {
const unsubWake = wakeWordService.onWakeWord(async () => {
console.log('[Chat] Wake Word erkannt — starte Auto-Aufnahme');
// TTS stoppen damit ARIA sich nicht selbst hoert
audioService.stopPlayback();
// Aufnahme mit Auto-Stop (VAD) starten
const started = await audioService.startRecording(true);
if (!started) {
// Mikrofon nicht verfuegbar, Wake Word wieder aktivieren
wakeWordService.resume();
}
});
// Auto-Stop Callback: wenn Stille erkannt → Aufnahme senden + Wake Word wieder starten
const unsubSilence = audioService.onSilenceDetected(async () => {
const result = await audioService.stopRecording();
if (result && result.durationMs > 500) {
// Sprachnachricht senden (gleiche Logik wie handleVoiceRecording)
const location = await getCurrentLocation();
const userMsg: ChatMessage = {
id: nextId(),
sender: 'user',
text: '[Sprachnachricht]',
timestamp: Date.now(),
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
};
setMessages(prev => [...prev, userMsg]);
rvs.send('audio', {
base64: result.base64,
durationMs: result.durationMs,
mimeType: result.mimeType,
...(location && { location }),
});
}
// Wake Word wieder aktivieren
if (wakeWordActive) wakeWordService.resume();
});
return () => {
unsubWake();
unsubSilence();
};
}, [wakeWordActive]);
// Wake Word Toggle Handler
const toggleWakeWord = useCallback(async () => {
if (wakeWordActive) {
wakeWordService.stop();
setWakeWordActive(false);
} else {
const started = await wakeWordService.start();
setWakeWordActive(started);
}
}, [wakeWordActive]);
// Chat-Verlauf in AsyncStorage speichern (letzte N Nachrichten) // Chat-Verlauf in AsyncStorage speichern (letzte N Nachrichten)
useEffect(() => { useEffect(() => {
if (messages.length === 0) return; if (messages.length === 0) return;
@ -366,7 +424,14 @@ const ChatScreen: React.FC = () => {
<VoiceButton <VoiceButton
onRecordingComplete={handleVoiceRecording} onRecordingComplete={handleVoiceRecording}
disabled={connectionState !== 'connected'} disabled={connectionState !== 'connected'}
wakeWordActive={wakeWordActive}
/> />
<TouchableOpacity
style={[styles.wakeWordBtn, wakeWordActive && styles.wakeWordBtnActive]}
onPress={toggleWakeWord}
>
<Text style={styles.wakeWordIcon}>{wakeWordActive ? '👂' : '🔇'}</Text>
</TouchableOpacity>
)} )}
</View> </View>
@ -530,6 +595,21 @@ const styles = StyleSheet.create({
sendIcon: { sendIcon: {
fontSize: 18, fontSize: 18,
}, },
wakeWordBtn: {
width: 32,
height: 32,
borderRadius: 16,
backgroundColor: 'rgba(255,255,255,0.1)',
alignItems: 'center',
justifyContent: 'center',
marginLeft: 4,
},
wakeWordBtnActive: {
backgroundColor: 'rgba(52, 199, 89, 0.3)',
},
wakeWordIcon: {
fontSize: 16,
},
modalOverlay: { modalOverlay: {
flex: 1, flex: 1,
backgroundColor: 'rgba(0,0,0,0.6)', backgroundColor: 'rgba(0,0,0,0.6)',

View File

@ -1,13 +1,20 @@
/** /**
* Audio-Service fuer Sprach-Ein-/Ausgabe * Audio-Service fuer Sprach-Ein-/Ausgabe
* *
* Verwaltet Mikrofon-Aufnahme und TTS-Audiowiedergabe. * Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
* Nutzt react-native-sound und die nativen Audio-APIs. * TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
*/ */
import { Platform, PermissionsAndroid } from 'react-native'; import { Platform, PermissionsAndroid } from 'react-native';
import Sound from 'react-native-sound'; import Sound from 'react-native-sound';
import RNFS from 'react-native-fs'; import RNFS from 'react-native-fs';
import AudioRecorderPlayer, {
AudioEncoderAndroidType,
AudioSourceAndroidType,
AVEncodingOption,
OutputFormatAndroidType,
} from 'react-native-audio-recorder-player';
// --- Typen --- // --- Typen ---
@ -23,6 +30,8 @@ export interface RecordingResult {
export type RecordingState = 'idle' | 'recording' | 'processing'; export type RecordingState = 'idle' | 'recording' | 'processing';
type RecordingStateCallback = (state: RecordingState) => void; type RecordingStateCallback = (state: RecordingState) => void;
type MeterCallback = (db: number) => void;
type SilenceCallback = () => void;
// --- Konstanten --- // --- Konstanten ---
@ -30,17 +39,34 @@ const AUDIO_SAMPLE_RATE = 16000;
const AUDIO_CHANNELS = 1; const AUDIO_CHANNELS = 1;
const AUDIO_ENCODING = 'audio/wav'; const AUDIO_ENCODING = 'audio/wav';
// VAD (Voice Activity Detection) — Stille-Erkennung
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
// --- Audio-Service --- // --- Audio-Service ---
class AudioService { class AudioService {
private recordingState: RecordingState = 'idle'; private recordingState: RecordingState = 'idle';
private recordingStartTime: number = 0; private recordingStartTime: number = 0;
private stateListeners: RecordingStateCallback[] = []; private stateListeners: RecordingStateCallback[] = [];
private meterListeners: MeterCallback[] = [];
private silenceListeners: SilenceCallback[] = [];
private currentSound: Sound | null = null; private currentSound: Sound | null = null;
private recorder: AudioRecorderPlayer;
private recordingPath: string = '';
// VAD State
private vadEnabled: boolean = false;
private lastSpeechTime: number = 0;
private vadTimer: ReturnType<typeof setInterval> | null = null;
constructor() {
this.recorder = new AudioRecorderPlayer();
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
}
// --- Berechtigungen --- // --- Berechtigungen ---
/** Mikrofon-Berechtigung anfordern */
async requestMicrophonePermission(): Promise<boolean> { async requestMicrophonePermission(): Promise<boolean> {
if (Platform.OS !== 'android') { if (Platform.OS !== 'android') {
return true; return true;
@ -66,7 +92,7 @@ class AudioService {
// --- Aufnahme --- // --- Aufnahme ---
/** Mikrofon-Aufnahme starten */ /** Mikrofon-Aufnahme starten */
async startRecording(): Promise<boolean> { async startRecording(autoStop: boolean = false): Promise<boolean> {
if (this.recordingState !== 'idle') { if (this.recordingState !== 'idle') {
console.warn('[Audio] Aufnahme laeuft bereits'); console.warn('[Audio] Aufnahme laeuft bereits');
return false; return false;
@ -79,11 +105,48 @@ class AudioService {
} }
try { try {
// Nativer Aufnahme-Start ueber AudioRecorder-Bridge // Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
// In Produktion: Native Module oder react-native-audio-recorder-player nutzen this.stopPlayback();
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
// Aufnahme mit Metering starten
await this.recorder.startRecorder(this.recordingPath, {
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
AudioSourceAndroid: AudioSourceAndroidType.MIC,
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
}, true); // meteringEnabled = true
// Metering-Callback
this.recorder.addRecordBackListener((e) => {
const db = e.currentMetering ?? -160;
this.meterListeners.forEach(cb => cb(db));
// VAD: Stille erkennen
if (this.vadEnabled) {
if (db > VAD_SILENCE_THRESHOLD_DB) {
this.lastSpeechTime = Date.now();
}
}
});
this.recordingStartTime = Date.now(); this.recordingStartTime = Date.now();
this.lastSpeechTime = Date.now();
this.setState('recording'); this.setState('recording');
console.log('[Audio] Aufnahme gestartet');
// VAD aktivieren
this.vadEnabled = autoStop;
if (autoStop) {
this.vadTimer = setInterval(() => {
const silenceDuration = Date.now() - this.lastSpeechTime;
if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
this.silenceListeners.forEach(cb => cb());
}
}, 200);
}
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
return true; return true;
} catch (err) { } catch (err) {
console.error('[Audio] Fehler beim Starten der Aufnahme:', err); console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
@ -100,22 +163,31 @@ class AudioService {
} }
this.setState('processing'); this.setState('processing');
this.vadEnabled = false;
if (this.vadTimer) {
clearInterval(this.vadTimer);
this.vadTimer = null;
}
try { try {
await this.recorder.stopRecorder();
this.recorder.removeRecordBackListener();
const durationMs = Date.now() - this.recordingStartTime; const durationMs = Date.now() - this.recordingStartTime;
// In Produktion: Audiodaten vom nativen Recorder holen // Audio-Datei als Base64 lesen
// const audioData = await NativeAudioRecorder.stop(); const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
const base64Placeholder = ''; // Platzhalter bis Native-Bridge implementiert
// Temp-Datei aufraeumen
RNFS.unlink(this.recordingPath).catch(() => {});
this.setState('idle'); this.setState('idle');
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);
console.log(`[Audio] Aufnahme beendet (${durationMs}ms)`);
return { return {
base64: base64Placeholder, base64: base64Data,
durationMs, durationMs,
mimeType: AUDIO_ENCODING, mimeType: 'audio/mp4', // AAC in MP4 Container
}; };
} catch (err) { } catch (err) {
console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err); console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
@ -134,7 +206,7 @@ class AudioService {
this.stopPlayback(); this.stopPlayback();
try { try {
// Base64 → temporaere WAV-Datei → Sound abspielen // Base64 -> temporaere WAV-Datei -> Sound abspielen
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`; const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
await RNFS.writeFile(tmpPath, base64Data, 'base64'); await RNFS.writeFile(tmpPath, base64Data, 'base64');
@ -152,7 +224,6 @@ class AudioService {
} }
this.currentSound?.release(); this.currentSound?.release();
this.currentSound = null; this.currentSound = null;
// Temp-Datei aufraeumen
RNFS.unlink(tmpPath).catch(() => {}); RNFS.unlink(tmpPath).catch(() => {});
}); });
}); });
@ -170,7 +241,7 @@ class AudioService {
} }
} }
// --- Status --- // --- Status & Callbacks ---
getRecordingState(): RecordingState { getRecordingState(): RecordingState {
return this.recordingState; return this.recordingState;
@ -184,6 +255,22 @@ class AudioService {
}; };
} }
/** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
onMeterUpdate(callback: MeterCallback): () => void {
this.meterListeners.push(callback);
return () => {
this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
};
}
/** Callback wenn VAD Stille erkennt (Auto-Stop) */
onSilenceDetected(callback: SilenceCallback): () => void {
this.silenceListeners.push(callback);
return () => {
this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
};
}
private setState(state: RecordingState): void { private setState(state: RecordingState): void {
if (this.recordingState !== state) { if (this.recordingState !== state) {
this.recordingState = state; this.recordingState = state;

View File

@ -0,0 +1,145 @@
/**
* Wake Word Service "ARIA" Erkennung
*
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
*
* Architektur:
* App (Mikrofon) Energie-Erkennung Audio-Buffer
* RVS "wake_check" Bridge openwakeword Bestaetigung
* App startet Aufnahme
*
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
*/
import LiveAudioStream from 'react-native-live-audio-stream';
type WakeWordCallback = () => void;
type StateCallback = (state: WakeWordState) => void;
export type WakeWordState = 'off' | 'listening' | 'detected';
class WakeWordService {
private state: WakeWordState = 'off';
private wakeCallbacks: WakeWordCallback[] = [];
private stateCallbacks: StateCallback[] = [];
private isInitialized = false;
/** Wake Word Erkennung starten */
async start(): Promise<boolean> {
if (this.state === 'listening') return true;
try {
if (!this.isInitialized) {
LiveAudioStream.init({
sampleRate: 16000,
channels: 1,
bitsPerSample: 16,
audioSource: 6, // VOICE_RECOGNITION
bufferSize: 4096,
});
this.isInitialized = true;
}
// Audio-Stream starten und auf Energie pruefen
LiveAudioStream.start();
LiveAudioStream.on('data', (base64Chunk: string) => {
if (this.state !== 'listening') return;
// Base64 → Int16 Array → RMS berechnen
const raw = this._base64ToInt16(base64Chunk);
const rms = this._calculateRMS(raw);
// Schwellwert: wenn laut genug → Wake Word erkannt
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
// Phase 2: Porcupine "ARIA" Keyword
if (rms > 2000) {
this.setState('detected');
this.wakeCallbacks.forEach(cb => cb());
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
this.stop();
}
});
this.setState('listening');
console.log('[WakeWord] Listening gestartet');
return true;
} catch (err) {
console.error('[WakeWord] Start fehlgeschlagen:', err);
return false;
}
}
/** Wake Word Erkennung stoppen */
stop(): void {
if (this.state === 'off') return;
try {
LiveAudioStream.stop();
} catch {}
this.setState('off');
console.log('[WakeWord] Gestoppt');
}
/** Nach Aufnahme erneut starten */
async resume(): Promise<void> {
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
setTimeout(() => {
if (this.state === 'off') {
this.start();
}
}, 500);
}
// --- Callbacks ---
onWakeWord(callback: WakeWordCallback): () => void {
this.wakeCallbacks.push(callback);
return () => {
this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
};
}
onStateChange(callback: StateCallback): () => void {
this.stateCallbacks.push(callback);
return () => {
this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
};
}
getState(): WakeWordState {
return this.state;
}
// --- Hilfsfunktionen ---
private setState(state: WakeWordState): void {
if (this.state !== state) {
this.state = state;
this.stateCallbacks.forEach(cb => cb(state));
}
}
private _base64ToInt16(base64: string): Int16Array {
const binary = atob(base64);
const bytes = new Uint8Array(binary.length);
for (let i = 0; i < binary.length; i++) {
bytes[i] = binary.charCodeAt(i);
}
return new Int16Array(bytes.buffer);
}
private _calculateRMS(samples: Int16Array): number {
if (samples.length === 0) return 0;
let sum = 0;
for (let i = 0; i < samples.length; i++) {
sum += samples[i] * samples[i];
}
return Math.sqrt(sum / samples.length);
}
}
const wakeWordService = new WakeWordService();
export default wakeWordService;

View File

@ -30,6 +30,7 @@ import wave
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
import subprocess
import urllib.request import urllib.request
import numpy as np import numpy as np
import sounddevice as sd import sounddevice as sd
@ -959,13 +960,78 @@ class ARIABridge:
await self.ws_core.send(raw_message) await self.ws_core.send(raw_message)
elif msg_type == "audio": elif msg_type == "audio":
# Audio von der App → STT → an aria-core # Audio von der App → decodieren → STT → an aria-core
logger.info("[rvs] Audio empfangen — TODO: STT") audio_b64 = payload.get("base64", "")
# Spaeter: Audio decodieren, durch Whisper jagen, Ergebnis an core mime_type = payload.get("mimeType", "audio/mp4")
duration_ms = payload.get("durationMs", 0)
if not audio_b64:
logger.warning("[rvs] Audio ohne Daten empfangen")
return
logger.info("[rvs] Audio empfangen: %s, %dms, %dKB",
mime_type, duration_ms, len(audio_b64) // 1365)
asyncio.create_task(self._process_app_audio(audio_b64, mime_type))
else: else:
logger.debug("[rvs] Unbekannter Typ: %s", msg_type) logger.debug("[rvs] Unbekannter Typ: %s", msg_type)
async def _process_app_audio(self, audio_b64: str, mime_type: str) -> None:
"""Decodiert App-Audio (Base64 AAC/MP4), konvertiert zu 16kHz PCM, STT, sendet an core."""
loop = asyncio.get_event_loop()
tmp_in = None
tmp_out = None
try:
# Base64 → temp-Datei
ext = ".mp4" if "mp4" in mime_type else ".wav" if "wav" in mime_type else ".ogg"
tmp_in = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
tmp_in.write(base64.b64decode(audio_b64))
tmp_in.close()
# FFmpeg: beliebiges Format → 16kHz mono PCM (raw float32)
tmp_out = tempfile.NamedTemporaryFile(suffix=".raw", delete=False)
tmp_out.close()
cmd = [
"ffmpeg", "-y", "-i", tmp_in.name,
"-ar", "16000", "-ac", "1", "-f", "f32le",
tmp_out.name,
]
result = await loop.run_in_executor(
None,
lambda: subprocess.run(cmd, capture_output=True, timeout=30),
)
if result.returncode != 0:
logger.error("[rvs] FFmpeg Fehler: %s", result.stderr.decode()[:200])
return
# PCM lesen → numpy float32
audio_data = np.fromfile(tmp_out.name, dtype=np.float32)
if len(audio_data) == 0:
logger.warning("[rvs] Leere Audio-Daten nach Konvertierung")
return
duration_s = len(audio_data) / 16000.0
logger.info("[rvs] Audio konvertiert: %.1fs, %d samples", duration_s, len(audio_data))
# STT
text = await loop.run_in_executor(None, self.stt_engine.transcribe, audio_data)
if text.strip():
logger.info("[rvs] STT Ergebnis: '%s'", text[:80])
await self.send_to_core(text, source="app-voice")
else:
logger.info("[rvs] Keine Sprache erkannt — ignoriert")
except Exception:
logger.exception("[rvs] Audio-Verarbeitung fehlgeschlagen")
finally:
# Temp-Dateien aufraeumen
for f in [tmp_in, tmp_out]:
if f:
try:
os.unlink(f.name)
except OSError:
pass
async def _send_to_rvs(self, message: dict) -> None: async def _send_to_rvs(self, message: dict) -> None:
"""Sendet eine Nachricht an die App (via RVS).""" """Sendet eine Nachricht an die App (via RVS)."""
if self.ws_rvs is None or not self.ws_rvs.open: if self.ws_rvs is None or not self.ws_rvs.open:

View File

@ -87,7 +87,7 @@ services:
- RVS_TOKEN=${RVS_TOKEN:-} - RVS_TOKEN=${RVS_TOKEN:-}
restart: unless-stopped restart: unless-stopped
# ─── Diagnostic (Selbstcheck-UI) ────────────────────── # ─── Diagnostic (Selbstcheck-UI und Einstellungen) ────
diagnostic: diagnostic:
build: ./diagnostic build: ./diagnostic
container_name: aria-diagnostic container_name: aria-diagnostic