added audio workword, and recording, editied readme
This commit is contained in:
parent
b687f790ba
commit
dbd97d3cf4
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
<uses-permission android:name="android.permission.INTERNET" />
|
||||
<uses-permission android:name="android.permission.CAMERA" />
|
||||
<uses-permission android:name="android.permission.RECORD_AUDIO" />
|
||||
|
||||
<application
|
||||
android:name=".MainApplication"
|
||||
|
|
|
|||
|
|
@ -23,7 +23,9 @@
|
|||
"react-native-permissions": "^4.1.4",
|
||||
"react-native-camera-kit": "^13.0.0",
|
||||
"@react-native-async-storage/async-storage": "^1.21.0",
|
||||
"react-native-fs": "^2.20.0"
|
||||
"react-native-fs": "^2.20.0",
|
||||
"react-native-audio-recorder-player": "^3.6.7",
|
||||
"react-native-live-audio-stream": "^1.3.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
"typescript": "^5.3.3",
|
||||
|
|
|
|||
|
|
@ -1,18 +1,23 @@
|
|||
/**
|
||||
* VoiceButton - Push-to-Talk Aufnahmeknopf
|
||||
* VoiceButton - Push-to-Talk + Auto-Stop Aufnahmeknopf
|
||||
*
|
||||
* Zwei Modi:
|
||||
* 1. Push-to-Talk: gedrueckt halten zum Aufnehmen, loslassen zum Senden
|
||||
* 2. Tap-to-Talk: einmal tippen startet Aufnahme, VAD stoppt automatisch bei Stille
|
||||
* (auch genutzt fuer Wake-Word-getriggerte Aufnahme)
|
||||
*
|
||||
* Grosser runder Button: gedrueckt halten zum Aufnehmen, loslassen zum Senden.
|
||||
* Visuelles Feedback durch pulsierende Animation waehrend der Aufnahme.
|
||||
*/
|
||||
|
||||
import React, { useState, useRef, useEffect } from 'react';
|
||||
import React, { useState, useRef, useEffect, useCallback } from 'react';
|
||||
import {
|
||||
View,
|
||||
Text,
|
||||
Animated,
|
||||
StyleSheet,
|
||||
GestureResponderEvent,
|
||||
Easing,
|
||||
TouchableOpacity,
|
||||
Pressable,
|
||||
} from 'react-native';
|
||||
import audioService, { RecordingResult } from '../services/audio';
|
||||
|
||||
|
|
@ -23,15 +28,23 @@ interface VoiceButtonProps {
|
|||
onRecordingComplete: (result: RecordingResult) => void;
|
||||
/** Button deaktivieren */
|
||||
disabled?: boolean;
|
||||
/** Wake-Word-Modus aktiv (zeigt Indikator) */
|
||||
wakeWordActive?: boolean;
|
||||
}
|
||||
|
||||
// --- Komponente ---
|
||||
|
||||
const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled = false }) => {
|
||||
const VoiceButton: React.FC<VoiceButtonProps> = ({
|
||||
onRecordingComplete,
|
||||
disabled = false,
|
||||
wakeWordActive = false,
|
||||
}) => {
|
||||
const [isRecording, setIsRecording] = useState(false);
|
||||
const [durationMs, setDurationMs] = useState(0);
|
||||
const [meterDb, setMeterDb] = useState(-160);
|
||||
const pulseAnim = useRef(new Animated.Value(1)).current;
|
||||
const durationTimer = useRef<ReturnType<typeof setInterval> | null>(null);
|
||||
const isLongPress = useRef(false);
|
||||
|
||||
// Puls-Animation starten/stoppen
|
||||
useEffect(() => {
|
||||
|
|
@ -59,53 +72,111 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled
|
|||
}
|
||||
}, [isRecording, pulseAnim]);
|
||||
|
||||
// Aufnahmedauer zaehlen
|
||||
// Aufnahmedauer zaehlen + Metering
|
||||
useEffect(() => {
|
||||
if (isRecording) {
|
||||
setDurationMs(0);
|
||||
durationTimer.current = setInterval(() => {
|
||||
setDurationMs(prev => prev + 100);
|
||||
}, 100);
|
||||
|
||||
const unsubMeter = audioService.onMeterUpdate(setMeterDb);
|
||||
return () => {
|
||||
unsubMeter();
|
||||
if (durationTimer.current) clearInterval(durationTimer.current);
|
||||
};
|
||||
} else {
|
||||
if (durationTimer.current) {
|
||||
clearInterval(durationTimer.current);
|
||||
durationTimer.current = null;
|
||||
}
|
||||
}
|
||||
return () => {
|
||||
if (durationTimer.current) {
|
||||
clearInterval(durationTimer.current);
|
||||
}
|
||||
};
|
||||
}, [isRecording]);
|
||||
|
||||
const handlePressIn = async (_event: GestureResponderEvent) => {
|
||||
if (disabled) return;
|
||||
const started = await audioService.startRecording();
|
||||
// VAD Silence Callback — Auto-Stop
|
||||
useEffect(() => {
|
||||
const unsubSilence = audioService.onSilenceDetected(async () => {
|
||||
if (!isRecording) return;
|
||||
setIsRecording(false);
|
||||
const result = await audioService.stopRecording();
|
||||
if (result && result.durationMs > 500) {
|
||||
onRecordingComplete(result);
|
||||
}
|
||||
});
|
||||
return unsubSilence;
|
||||
}, [isRecording, onRecordingComplete]);
|
||||
|
||||
// Auto-Start fuer Wake Word (extern getriggert)
|
||||
const startAutoRecording = useCallback(async () => {
|
||||
if (disabled || isRecording) return;
|
||||
const started = await audioService.startRecording(true); // autoStop = true
|
||||
if (started) {
|
||||
isLongPress.current = false;
|
||||
setIsRecording(true);
|
||||
}
|
||||
}, [disabled, isRecording]);
|
||||
|
||||
// Push-to-Talk: Lang druecken
|
||||
const handlePressIn = async () => {
|
||||
if (disabled || isRecording) return;
|
||||
isLongPress.current = true;
|
||||
const started = await audioService.startRecording(false); // kein autoStop
|
||||
if (started) {
|
||||
setIsRecording(true);
|
||||
}
|
||||
};
|
||||
|
||||
const handlePressOut = async (_event: GestureResponderEvent) => {
|
||||
if (!isRecording) return;
|
||||
const handlePressOut = async () => {
|
||||
if (!isRecording || !isLongPress.current) return;
|
||||
isLongPress.current = false;
|
||||
setIsRecording(false);
|
||||
|
||||
const result = await audioService.stopRecording();
|
||||
if (result && result.durationMs > 300) {
|
||||
// Nur senden wenn laenger als 300ms (versehentliches Tippen vermeiden)
|
||||
onRecordingComplete(result);
|
||||
}
|
||||
};
|
||||
|
||||
// Tap-to-Talk: Einmal tippen startet mit Auto-Stop
|
||||
const handleTap = async () => {
|
||||
if (disabled) return;
|
||||
if (isRecording) {
|
||||
// Aufnahme manuell stoppen
|
||||
setIsRecording(false);
|
||||
const result = await audioService.stopRecording();
|
||||
if (result && result.durationMs > 300) {
|
||||
onRecordingComplete(result);
|
||||
}
|
||||
} else {
|
||||
// Aufnahme mit Auto-Stop starten
|
||||
const started = await audioService.startRecording(true);
|
||||
if (started) {
|
||||
isLongPress.current = false;
|
||||
setIsRecording(true);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Expose startAutoRecording via ref fuer Wake Word
|
||||
React.useImperativeHandle(
|
||||
React.createRef(),
|
||||
() => ({ startAutoRecording }),
|
||||
[startAutoRecording],
|
||||
);
|
||||
|
||||
const formatDuration = (ms: number): string => {
|
||||
const seconds = Math.floor(ms / 1000);
|
||||
const tenths = Math.floor((ms % 1000) / 100);
|
||||
return `${seconds}.${tenths}s`;
|
||||
};
|
||||
|
||||
// Meter-Visualisierung (0-1 Skala)
|
||||
const meterLevel = Math.max(0, Math.min(1, (meterDb + 60) / 60));
|
||||
|
||||
return (
|
||||
<View style={styles.container}>
|
||||
{wakeWordActive && !isRecording && (
|
||||
<View style={styles.wakeWordDot} />
|
||||
)}
|
||||
<Animated.View
|
||||
style={[
|
||||
styles.buttonOuter,
|
||||
|
|
@ -117,17 +188,28 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({ onRecordingComplete, disabled
|
|||
onResponderRelease={handlePressOut}
|
||||
onResponderTerminate={handlePressOut}
|
||||
>
|
||||
<View style={[styles.buttonInner, isRecording && styles.buttonInnerRecording]}>
|
||||
<TouchableOpacity
|
||||
activeOpacity={0.8}
|
||||
onPress={handleTap}
|
||||
disabled={disabled}
|
||||
style={[styles.buttonInner, isRecording && styles.buttonInnerRecording]}
|
||||
>
|
||||
<Text style={styles.buttonIcon}>{isRecording ? '⏹' : '🎙'}</Text>
|
||||
</View>
|
||||
</TouchableOpacity>
|
||||
</Animated.View>
|
||||
{isRecording && (
|
||||
<Text style={styles.durationText}>{formatDuration(durationMs)}</Text>
|
||||
<View style={styles.infoRow}>
|
||||
<View style={[styles.meterBar, { width: `${meterLevel * 100}%` }]} />
|
||||
<Text style={styles.durationText}>{formatDuration(durationMs)}</Text>
|
||||
</View>
|
||||
)}
|
||||
</View>
|
||||
);
|
||||
};
|
||||
|
||||
// Expose startAutoRecording fuer externe Aufrufe (Wake Word)
|
||||
export type VoiceButtonHandle = { startAutoRecording: () => Promise<void> };
|
||||
|
||||
// --- Styles ---
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
|
|
@ -135,6 +217,16 @@ const styles = StyleSheet.create({
|
|||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
},
|
||||
wakeWordDot: {
|
||||
position: 'absolute',
|
||||
top: -4,
|
||||
right: -4,
|
||||
width: 10,
|
||||
height: 10,
|
||||
borderRadius: 5,
|
||||
backgroundColor: '#34C759',
|
||||
zIndex: 10,
|
||||
},
|
||||
buttonOuter: {
|
||||
width: 64,
|
||||
height: 64,
|
||||
|
|
@ -165,10 +257,20 @@ const styles = StyleSheet.create({
|
|||
buttonIcon: {
|
||||
fontSize: 24,
|
||||
},
|
||||
infoRow: {
|
||||
alignItems: 'center',
|
||||
marginTop: 4,
|
||||
width: 80,
|
||||
},
|
||||
meterBar: {
|
||||
height: 3,
|
||||
backgroundColor: '#FF3B30',
|
||||
borderRadius: 2,
|
||||
marginBottom: 2,
|
||||
},
|
||||
durationText: {
|
||||
color: '#FF3B30',
|
||||
fontSize: 12,
|
||||
marginTop: 4,
|
||||
fontVariant: ['tabular-nums'],
|
||||
},
|
||||
});
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ import {
|
|||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||
import rvs, { RVSMessage, ConnectionState } from '../services/rvs';
|
||||
import audioService from '../services/audio';
|
||||
import wakeWordService from '../services/wakeword';
|
||||
import VoiceButton from '../components/VoiceButton';
|
||||
import FileUpload, { FileData } from '../components/FileUpload';
|
||||
import CameraUpload, { PhotoData } from '../components/CameraUpload';
|
||||
|
|
@ -56,6 +57,7 @@ const ChatScreen: React.FC = () => {
|
|||
const [showFileUpload, setShowFileUpload] = useState(false);
|
||||
const [showCameraUpload, setShowCameraUpload] = useState(false);
|
||||
const [gpsEnabled, setGpsEnabled] = useState(false);
|
||||
const [wakeWordActive, setWakeWordActive] = useState(false);
|
||||
|
||||
const flatListRef = useRef<FlatList>(null);
|
||||
const messageIdCounter = useRef(0);
|
||||
|
|
@ -134,6 +136,62 @@ const ChatScreen: React.FC = () => {
|
|||
};
|
||||
}, []);
|
||||
|
||||
// Wake Word: "ARIA" Erkennung → Auto-Aufnahme starten
|
||||
useEffect(() => {
|
||||
const unsubWake = wakeWordService.onWakeWord(async () => {
|
||||
console.log('[Chat] Wake Word erkannt — starte Auto-Aufnahme');
|
||||
// TTS stoppen damit ARIA sich nicht selbst hoert
|
||||
audioService.stopPlayback();
|
||||
// Aufnahme mit Auto-Stop (VAD) starten
|
||||
const started = await audioService.startRecording(true);
|
||||
if (!started) {
|
||||
// Mikrofon nicht verfuegbar, Wake Word wieder aktivieren
|
||||
wakeWordService.resume();
|
||||
}
|
||||
});
|
||||
|
||||
// Auto-Stop Callback: wenn Stille erkannt → Aufnahme senden + Wake Word wieder starten
|
||||
const unsubSilence = audioService.onSilenceDetected(async () => {
|
||||
const result = await audioService.stopRecording();
|
||||
if (result && result.durationMs > 500) {
|
||||
// Sprachnachricht senden (gleiche Logik wie handleVoiceRecording)
|
||||
const location = await getCurrentLocation();
|
||||
const userMsg: ChatMessage = {
|
||||
id: nextId(),
|
||||
sender: 'user',
|
||||
text: '[Sprachnachricht]',
|
||||
timestamp: Date.now(),
|
||||
attachments: [{ type: 'audio', name: 'Sprachaufnahme' }],
|
||||
};
|
||||
setMessages(prev => [...prev, userMsg]);
|
||||
rvs.send('audio', {
|
||||
base64: result.base64,
|
||||
durationMs: result.durationMs,
|
||||
mimeType: result.mimeType,
|
||||
...(location && { location }),
|
||||
});
|
||||
}
|
||||
// Wake Word wieder aktivieren
|
||||
if (wakeWordActive) wakeWordService.resume();
|
||||
});
|
||||
|
||||
return () => {
|
||||
unsubWake();
|
||||
unsubSilence();
|
||||
};
|
||||
}, [wakeWordActive]);
|
||||
|
||||
// Wake Word Toggle Handler
|
||||
const toggleWakeWord = useCallback(async () => {
|
||||
if (wakeWordActive) {
|
||||
wakeWordService.stop();
|
||||
setWakeWordActive(false);
|
||||
} else {
|
||||
const started = await wakeWordService.start();
|
||||
setWakeWordActive(started);
|
||||
}
|
||||
}, [wakeWordActive]);
|
||||
|
||||
// Chat-Verlauf in AsyncStorage speichern (letzte N Nachrichten)
|
||||
useEffect(() => {
|
||||
if (messages.length === 0) return;
|
||||
|
|
@ -366,7 +424,14 @@ const ChatScreen: React.FC = () => {
|
|||
<VoiceButton
|
||||
onRecordingComplete={handleVoiceRecording}
|
||||
disabled={connectionState !== 'connected'}
|
||||
wakeWordActive={wakeWordActive}
|
||||
/>
|
||||
<TouchableOpacity
|
||||
style={[styles.wakeWordBtn, wakeWordActive && styles.wakeWordBtnActive]}
|
||||
onPress={toggleWakeWord}
|
||||
>
|
||||
<Text style={styles.wakeWordIcon}>{wakeWordActive ? '👂' : '🔇'}</Text>
|
||||
</TouchableOpacity>
|
||||
)}
|
||||
</View>
|
||||
|
||||
|
|
@ -530,6 +595,21 @@ const styles = StyleSheet.create({
|
|||
sendIcon: {
|
||||
fontSize: 18,
|
||||
},
|
||||
wakeWordBtn: {
|
||||
width: 32,
|
||||
height: 32,
|
||||
borderRadius: 16,
|
||||
backgroundColor: 'rgba(255,255,255,0.1)',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center',
|
||||
marginLeft: 4,
|
||||
},
|
||||
wakeWordBtnActive: {
|
||||
backgroundColor: 'rgba(52, 199, 89, 0.3)',
|
||||
},
|
||||
wakeWordIcon: {
|
||||
fontSize: 16,
|
||||
},
|
||||
modalOverlay: {
|
||||
flex: 1,
|
||||
backgroundColor: 'rgba(0,0,0,0.6)',
|
||||
|
|
|
|||
|
|
@ -1,13 +1,20 @@
|
|||
/**
|
||||
* Audio-Service fuer Sprach-Ein-/Ausgabe
|
||||
*
|
||||
* Verwaltet Mikrofon-Aufnahme und TTS-Audiowiedergabe.
|
||||
* Nutzt react-native-sound und die nativen Audio-APIs.
|
||||
* Verwaltet Mikrofon-Aufnahme (mit VAD/Auto-Stop bei Stille),
|
||||
* TTS-Audiowiedergabe und Metering fuer visuelle Feedback.
|
||||
* Nutzt react-native-audio-recorder-player fuer Aufnahme.
|
||||
*/
|
||||
|
||||
import { Platform, PermissionsAndroid } from 'react-native';
|
||||
import Sound from 'react-native-sound';
|
||||
import RNFS from 'react-native-fs';
|
||||
import AudioRecorderPlayer, {
|
||||
AudioEncoderAndroidType,
|
||||
AudioSourceAndroidType,
|
||||
AVEncodingOption,
|
||||
OutputFormatAndroidType,
|
||||
} from 'react-native-audio-recorder-player';
|
||||
|
||||
// --- Typen ---
|
||||
|
||||
|
|
@ -23,6 +30,8 @@ export interface RecordingResult {
|
|||
export type RecordingState = 'idle' | 'recording' | 'processing';
|
||||
|
||||
type RecordingStateCallback = (state: RecordingState) => void;
|
||||
type MeterCallback = (db: number) => void;
|
||||
type SilenceCallback = () => void;
|
||||
|
||||
// --- Konstanten ---
|
||||
|
||||
|
|
@ -30,17 +39,34 @@ const AUDIO_SAMPLE_RATE = 16000;
|
|||
const AUDIO_CHANNELS = 1;
|
||||
const AUDIO_ENCODING = 'audio/wav';
|
||||
|
||||
// VAD (Voice Activity Detection) — Stille-Erkennung
|
||||
const VAD_SILENCE_THRESHOLD_DB = -45; // dB unter dem als "Stille" gilt
|
||||
const VAD_SILENCE_DURATION_MS = 1800; // ms Stille bevor Auto-Stop
|
||||
|
||||
// --- Audio-Service ---
|
||||
|
||||
class AudioService {
|
||||
private recordingState: RecordingState = 'idle';
|
||||
private recordingStartTime: number = 0;
|
||||
private stateListeners: RecordingStateCallback[] = [];
|
||||
private meterListeners: MeterCallback[] = [];
|
||||
private silenceListeners: SilenceCallback[] = [];
|
||||
private currentSound: Sound | null = null;
|
||||
private recorder: AudioRecorderPlayer;
|
||||
private recordingPath: string = '';
|
||||
|
||||
// VAD State
|
||||
private vadEnabled: boolean = false;
|
||||
private lastSpeechTime: number = 0;
|
||||
private vadTimer: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
constructor() {
|
||||
this.recorder = new AudioRecorderPlayer();
|
||||
this.recorder.setSubscriptionDuration(0.1); // 100ms Metering-Updates
|
||||
}
|
||||
|
||||
// --- Berechtigungen ---
|
||||
|
||||
/** Mikrofon-Berechtigung anfordern */
|
||||
async requestMicrophonePermission(): Promise<boolean> {
|
||||
if (Platform.OS !== 'android') {
|
||||
return true;
|
||||
|
|
@ -66,7 +92,7 @@ class AudioService {
|
|||
// --- Aufnahme ---
|
||||
|
||||
/** Mikrofon-Aufnahme starten */
|
||||
async startRecording(): Promise<boolean> {
|
||||
async startRecording(autoStop: boolean = false): Promise<boolean> {
|
||||
if (this.recordingState !== 'idle') {
|
||||
console.warn('[Audio] Aufnahme laeuft bereits');
|
||||
return false;
|
||||
|
|
@ -79,11 +105,48 @@ class AudioService {
|
|||
}
|
||||
|
||||
try {
|
||||
// Nativer Aufnahme-Start ueber AudioRecorder-Bridge
|
||||
// In Produktion: Native Module oder react-native-audio-recorder-player nutzen
|
||||
// Laufende Wiedergabe stoppen (damit ARIA sich nicht selbst hoert)
|
||||
this.stopPlayback();
|
||||
|
||||
this.recordingPath = `${RNFS.CachesDirectoryPath}/aria_recording_${Date.now()}.mp4`;
|
||||
|
||||
// Aufnahme mit Metering starten
|
||||
await this.recorder.startRecorder(this.recordingPath, {
|
||||
AudioEncoderAndroid: AudioEncoderAndroidType.AAC,
|
||||
AudioSourceAndroid: AudioSourceAndroidType.MIC,
|
||||
OutputFormatAndroid: OutputFormatAndroidType.MPEG_4,
|
||||
}, true); // meteringEnabled = true
|
||||
|
||||
// Metering-Callback
|
||||
this.recorder.addRecordBackListener((e) => {
|
||||
const db = e.currentMetering ?? -160;
|
||||
this.meterListeners.forEach(cb => cb(db));
|
||||
|
||||
// VAD: Stille erkennen
|
||||
if (this.vadEnabled) {
|
||||
if (db > VAD_SILENCE_THRESHOLD_DB) {
|
||||
this.lastSpeechTime = Date.now();
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.recordingStartTime = Date.now();
|
||||
this.lastSpeechTime = Date.now();
|
||||
this.setState('recording');
|
||||
console.log('[Audio] Aufnahme gestartet');
|
||||
|
||||
// VAD aktivieren
|
||||
this.vadEnabled = autoStop;
|
||||
if (autoStop) {
|
||||
this.vadTimer = setInterval(() => {
|
||||
const silenceDuration = Date.now() - this.lastSpeechTime;
|
||||
if (silenceDuration >= VAD_SILENCE_DURATION_MS) {
|
||||
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`);
|
||||
this.silenceListeners.forEach(cb => cb());
|
||||
}
|
||||
}, 200);
|
||||
}
|
||||
|
||||
console.log('[Audio] Aufnahme gestartet (autoStop: %s)', autoStop);
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error('[Audio] Fehler beim Starten der Aufnahme:', err);
|
||||
|
|
@ -100,22 +163,31 @@ class AudioService {
|
|||
}
|
||||
|
||||
this.setState('processing');
|
||||
this.vadEnabled = false;
|
||||
if (this.vadTimer) {
|
||||
clearInterval(this.vadTimer);
|
||||
this.vadTimer = null;
|
||||
}
|
||||
|
||||
try {
|
||||
await this.recorder.stopRecorder();
|
||||
this.recorder.removeRecordBackListener();
|
||||
|
||||
const durationMs = Date.now() - this.recordingStartTime;
|
||||
|
||||
// In Produktion: Audiodaten vom nativen Recorder holen
|
||||
// const audioData = await NativeAudioRecorder.stop();
|
||||
const base64Placeholder = ''; // Platzhalter bis Native-Bridge implementiert
|
||||
// Audio-Datei als Base64 lesen
|
||||
const base64Data = await RNFS.readFile(this.recordingPath, 'base64');
|
||||
|
||||
// Temp-Datei aufraeumen
|
||||
RNFS.unlink(this.recordingPath).catch(() => {});
|
||||
|
||||
this.setState('idle');
|
||||
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms)`);
|
||||
console.log(`[Audio] Aufnahme beendet (${durationMs}ms, ${Math.round(base64Data.length / 1024)}KB)`);
|
||||
|
||||
return {
|
||||
base64: base64Placeholder,
|
||||
base64: base64Data,
|
||||
durationMs,
|
||||
mimeType: AUDIO_ENCODING,
|
||||
mimeType: 'audio/mp4', // AAC in MP4 Container
|
||||
};
|
||||
} catch (err) {
|
||||
console.error('[Audio] Fehler beim Stoppen der Aufnahme:', err);
|
||||
|
|
@ -134,7 +206,7 @@ class AudioService {
|
|||
this.stopPlayback();
|
||||
|
||||
try {
|
||||
// Base64 → temporaere WAV-Datei → Sound abspielen
|
||||
// Base64 -> temporaere WAV-Datei -> Sound abspielen
|
||||
const tmpPath = `${RNFS.CachesDirectoryPath}/aria_tts_${Date.now()}.wav`;
|
||||
await RNFS.writeFile(tmpPath, base64Data, 'base64');
|
||||
|
||||
|
|
@ -152,7 +224,6 @@ class AudioService {
|
|||
}
|
||||
this.currentSound?.release();
|
||||
this.currentSound = null;
|
||||
// Temp-Datei aufraeumen
|
||||
RNFS.unlink(tmpPath).catch(() => {});
|
||||
});
|
||||
});
|
||||
|
|
@ -170,7 +241,7 @@ class AudioService {
|
|||
}
|
||||
}
|
||||
|
||||
// --- Status ---
|
||||
// --- Status & Callbacks ---
|
||||
|
||||
getRecordingState(): RecordingState {
|
||||
return this.recordingState;
|
||||
|
|
@ -184,6 +255,22 @@ class AudioService {
|
|||
};
|
||||
}
|
||||
|
||||
/** Callback fuer Metering-Updates (dB Werte waehrend Aufnahme) */
|
||||
onMeterUpdate(callback: MeterCallback): () => void {
|
||||
this.meterListeners.push(callback);
|
||||
return () => {
|
||||
this.meterListeners = this.meterListeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
/** Callback wenn VAD Stille erkennt (Auto-Stop) */
|
||||
onSilenceDetected(callback: SilenceCallback): () => void {
|
||||
this.silenceListeners.push(callback);
|
||||
return () => {
|
||||
this.silenceListeners = this.silenceListeners.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
private setState(state: RecordingState): void {
|
||||
if (this.recordingState !== state) {
|
||||
this.recordingState = state;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,145 @@
|
|||
/**
|
||||
* Wake Word Service — "ARIA" Erkennung
|
||||
*
|
||||
* Nutzt react-native-live-audio-stream fuer kontinuierliches Mikrofon-Monitoring.
|
||||
* Erkennt Sprache per Energie-Schwellwert und sendet kurze Audio-Clips
|
||||
* zur serverseitigen Wake-Word-Pruefung (openwakeword in der Bridge).
|
||||
*
|
||||
* Architektur:
|
||||
* App (Mikrofon) → Energie-Erkennung → Audio-Buffer
|
||||
* → RVS "wake_check" → Bridge → openwakeword → Bestaetigung
|
||||
* → App startet Aufnahme
|
||||
*
|
||||
* Aktuell (Phase 1): Einfacher Tap-to-Talk + Auto-Stop.
|
||||
* Spaeter (Phase 2): Porcupine on-device "ARIA" Keyword.
|
||||
*/
|
||||
|
||||
import LiveAudioStream from 'react-native-live-audio-stream';
|
||||
|
||||
type WakeWordCallback = () => void;
|
||||
type StateCallback = (state: WakeWordState) => void;
|
||||
|
||||
export type WakeWordState = 'off' | 'listening' | 'detected';
|
||||
|
||||
class WakeWordService {
|
||||
private state: WakeWordState = 'off';
|
||||
private wakeCallbacks: WakeWordCallback[] = [];
|
||||
private stateCallbacks: StateCallback[] = [];
|
||||
private isInitialized = false;
|
||||
|
||||
/** Wake Word Erkennung starten */
|
||||
async start(): Promise<boolean> {
|
||||
if (this.state === 'listening') return true;
|
||||
|
||||
try {
|
||||
if (!this.isInitialized) {
|
||||
LiveAudioStream.init({
|
||||
sampleRate: 16000,
|
||||
channels: 1,
|
||||
bitsPerSample: 16,
|
||||
audioSource: 6, // VOICE_RECOGNITION
|
||||
bufferSize: 4096,
|
||||
});
|
||||
this.isInitialized = true;
|
||||
}
|
||||
|
||||
// Audio-Stream starten und auf Energie pruefen
|
||||
LiveAudioStream.start();
|
||||
|
||||
LiveAudioStream.on('data', (base64Chunk: string) => {
|
||||
if (this.state !== 'listening') return;
|
||||
|
||||
// Base64 → Int16 Array → RMS berechnen
|
||||
const raw = this._base64ToInt16(base64Chunk);
|
||||
const rms = this._calculateRMS(raw);
|
||||
|
||||
// Schwellwert: wenn laut genug → Wake Word erkannt
|
||||
// Phase 1: Einfache Energie-Erkennung (jemand spricht)
|
||||
// Phase 2: Porcupine "ARIA" Keyword
|
||||
if (rms > 2000) {
|
||||
this.setState('detected');
|
||||
this.wakeCallbacks.forEach(cb => cb());
|
||||
// Nach Detection kurz pausieren, Aufnahme uebernimmt das Mikrofon
|
||||
this.stop();
|
||||
}
|
||||
});
|
||||
|
||||
this.setState('listening');
|
||||
console.log('[WakeWord] Listening gestartet');
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error('[WakeWord] Start fehlgeschlagen:', err);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Wake Word Erkennung stoppen */
|
||||
stop(): void {
|
||||
if (this.state === 'off') return;
|
||||
try {
|
||||
LiveAudioStream.stop();
|
||||
} catch {}
|
||||
this.setState('off');
|
||||
console.log('[WakeWord] Gestoppt');
|
||||
}
|
||||
|
||||
/** Nach Aufnahme erneut starten */
|
||||
async resume(): Promise<void> {
|
||||
// Kurze Pause damit Aufnahme das Mikrofon freigeben kann
|
||||
setTimeout(() => {
|
||||
if (this.state === 'off') {
|
||||
this.start();
|
||||
}
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// --- Callbacks ---
|
||||
|
||||
onWakeWord(callback: WakeWordCallback): () => void {
|
||||
this.wakeCallbacks.push(callback);
|
||||
return () => {
|
||||
this.wakeCallbacks = this.wakeCallbacks.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
onStateChange(callback: StateCallback): () => void {
|
||||
this.stateCallbacks.push(callback);
|
||||
return () => {
|
||||
this.stateCallbacks = this.stateCallbacks.filter(cb => cb !== callback);
|
||||
};
|
||||
}
|
||||
|
||||
getState(): WakeWordState {
|
||||
return this.state;
|
||||
}
|
||||
|
||||
// --- Hilfsfunktionen ---
|
||||
|
||||
private setState(state: WakeWordState): void {
|
||||
if (this.state !== state) {
|
||||
this.state = state;
|
||||
this.stateCallbacks.forEach(cb => cb(state));
|
||||
}
|
||||
}
|
||||
|
||||
private _base64ToInt16(base64: string): Int16Array {
|
||||
const binary = atob(base64);
|
||||
const bytes = new Uint8Array(binary.length);
|
||||
for (let i = 0; i < binary.length; i++) {
|
||||
bytes[i] = binary.charCodeAt(i);
|
||||
}
|
||||
return new Int16Array(bytes.buffer);
|
||||
}
|
||||
|
||||
private _calculateRMS(samples: Int16Array): number {
|
||||
if (samples.length === 0) return 0;
|
||||
let sum = 0;
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
sum += samples[i] * samples[i];
|
||||
}
|
||||
return Math.sqrt(sum / samples.length);
|
||||
}
|
||||
}
|
||||
|
||||
const wakeWordService = new WakeWordService();
|
||||
export default wakeWordService;
|
||||
|
|
@ -30,6 +30,7 @@ import wave
|
|||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import subprocess
|
||||
import urllib.request
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
|
|
@ -959,13 +960,78 @@ class ARIABridge:
|
|||
await self.ws_core.send(raw_message)
|
||||
|
||||
elif msg_type == "audio":
|
||||
# Audio von der App → STT → an aria-core
|
||||
logger.info("[rvs] Audio empfangen — TODO: STT")
|
||||
# Spaeter: Audio decodieren, durch Whisper jagen, Ergebnis an core
|
||||
# Audio von der App → decodieren → STT → an aria-core
|
||||
audio_b64 = payload.get("base64", "")
|
||||
mime_type = payload.get("mimeType", "audio/mp4")
|
||||
duration_ms = payload.get("durationMs", 0)
|
||||
if not audio_b64:
|
||||
logger.warning("[rvs] Audio ohne Daten empfangen")
|
||||
return
|
||||
logger.info("[rvs] Audio empfangen: %s, %dms, %dKB",
|
||||
mime_type, duration_ms, len(audio_b64) // 1365)
|
||||
asyncio.create_task(self._process_app_audio(audio_b64, mime_type))
|
||||
|
||||
else:
|
||||
logger.debug("[rvs] Unbekannter Typ: %s", msg_type)
|
||||
|
||||
async def _process_app_audio(self, audio_b64: str, mime_type: str) -> None:
|
||||
"""Decodiert App-Audio (Base64 AAC/MP4), konvertiert zu 16kHz PCM, STT, sendet an core."""
|
||||
loop = asyncio.get_event_loop()
|
||||
tmp_in = None
|
||||
tmp_out = None
|
||||
try:
|
||||
# Base64 → temp-Datei
|
||||
ext = ".mp4" if "mp4" in mime_type else ".wav" if "wav" in mime_type else ".ogg"
|
||||
tmp_in = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
||||
tmp_in.write(base64.b64decode(audio_b64))
|
||||
tmp_in.close()
|
||||
|
||||
# FFmpeg: beliebiges Format → 16kHz mono PCM (raw float32)
|
||||
tmp_out = tempfile.NamedTemporaryFile(suffix=".raw", delete=False)
|
||||
tmp_out.close()
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y", "-i", tmp_in.name,
|
||||
"-ar", "16000", "-ac", "1", "-f", "f32le",
|
||||
tmp_out.name,
|
||||
]
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: subprocess.run(cmd, capture_output=True, timeout=30),
|
||||
)
|
||||
if result.returncode != 0:
|
||||
logger.error("[rvs] FFmpeg Fehler: %s", result.stderr.decode()[:200])
|
||||
return
|
||||
|
||||
# PCM lesen → numpy float32
|
||||
audio_data = np.fromfile(tmp_out.name, dtype=np.float32)
|
||||
if len(audio_data) == 0:
|
||||
logger.warning("[rvs] Leere Audio-Daten nach Konvertierung")
|
||||
return
|
||||
|
||||
duration_s = len(audio_data) / 16000.0
|
||||
logger.info("[rvs] Audio konvertiert: %.1fs, %d samples", duration_s, len(audio_data))
|
||||
|
||||
# STT
|
||||
text = await loop.run_in_executor(None, self.stt_engine.transcribe, audio_data)
|
||||
|
||||
if text.strip():
|
||||
logger.info("[rvs] STT Ergebnis: '%s'", text[:80])
|
||||
await self.send_to_core(text, source="app-voice")
|
||||
else:
|
||||
logger.info("[rvs] Keine Sprache erkannt — ignoriert")
|
||||
|
||||
except Exception:
|
||||
logger.exception("[rvs] Audio-Verarbeitung fehlgeschlagen")
|
||||
finally:
|
||||
# Temp-Dateien aufraeumen
|
||||
for f in [tmp_in, tmp_out]:
|
||||
if f:
|
||||
try:
|
||||
os.unlink(f.name)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
async def _send_to_rvs(self, message: dict) -> None:
|
||||
"""Sendet eine Nachricht an die App (via RVS)."""
|
||||
if self.ws_rvs is None or not self.ws_rvs.open:
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ services:
|
|||
- RVS_TOKEN=${RVS_TOKEN:-}
|
||||
restart: unless-stopped
|
||||
|
||||
# ─── Diagnostic (Selbstcheck-UI) ──────────────────────
|
||||
# ─── Diagnostic (Selbstcheck-UI und Einstellungen) ────
|
||||
diagnostic:
|
||||
build: ./diagnostic
|
||||
container_name: aria-diagnostic
|
||||
|
|
|
|||
Loading…
Reference in New Issue