Compare commits

...

2 Commits

Author SHA1 Message Date
duffyduck e7bb6c37cb feat: Sprechgeschwindigkeit-Range auf 0.1-5.0 erweitert
TTS_SPEED_MIN 0.5 → 0.1, TTS_SPEED_MAX 2.0 → 5.0.
Bridge-seitige Validierungen (aria_bridge.py + f5tts/bridge.py) mit-
gezogen auf den gleichen Bereich.

Hinweis: Extremwerte (unter 0.5 oder ueber 2.0) koennen bei F5-TTS
verzerrte Ausgaben produzieren — Stefan bekommt die Freiheit zum
Experimentieren.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 00:49:05 +02:00
duffyduck d146ca92c4 fix: Aufnahme-Crashes/Double-Tap durch VAD-Multi-Fire + stale closure
Drei zusammenhaengende Bugs:

1. VAD-Timer feuerte im 200ms setInterval WEITER nachdem die Stille-
   Schwelle erreicht war — listeners wurden pro Aufnahme bis zu 5x
   getriggert. Parallel laufende stopRecording()-Calls lieferten
   audio-recorder-player's nativen Layer OOM / Crash.

   Fix: silenceFired-Latch + Timer-Clear SOFORT beim ersten Feuer
   (fireSilenceOnce-Helper). Gleiche Logik fuer Max-Dauer + Conv-Window.

2. VoiceButton silence-listener re-registrierte bei jedem isRecording-
   Flip (deps [isRecording, onRecordingComplete]). Closure-State war
   stale, und bei schnellen flips gabs register/unregister-Races.

   Fix: empty deps, state direkt vom audioService via getRecordingState()
   lesen. onRecordingComplete via Ref (damit der Callback aktuell bleibt
   ohne re-register).

3. handleTap las den Button-State aus React (isRecording), der bei
   schnellen Taps stale sein konnte — "erst zweiter Tap geht" Symptom.

   Fix: audioService.getRecordingState() als Source-of-Truth, plus
   tapBusy-Ref als Anti-Doppel-Tap-Guard waehrend asyncer start/stop.
   'processing'-State wird korrekt ignoriert.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-25 00:47:53 +02:00
4 changed files with 68 additions and 35 deletions

View File

@ -93,18 +93,24 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({
} }
}, [isRecording]); }, [isRecording]);
// VAD Silence Callback — Auto-Stop // VAD Silence Callback — Auto-Stop.
// WICHTIG: NICHT auf isRecording prüfen (Closure ist stale) — stattdessen
// audioService selber fragen. Empty deps → Listener wird EINMAL registriert.
// audioService garantiert jetzt dass der Callback pro Aufnahme nur einmal
// feuert (silenceFired-Latch).
const onCompleteRef = useRef(onRecordingComplete);
useEffect(() => { onCompleteRef.current = onRecordingComplete; }, [onRecordingComplete]);
useEffect(() => { useEffect(() => {
const unsubSilence = audioService.onSilenceDetected(async () => { const unsubSilence = audioService.onSilenceDetected(async () => {
if (!isRecording) return; if (audioService.getRecordingState() !== 'recording') return;
setIsRecording(false);
const result = await audioService.stopRecording(); const result = await audioService.stopRecording();
setIsRecording(false);
if (result && result.durationMs > 500) { if (result && result.durationMs > 500) {
onRecordingComplete(result); onCompleteRef.current(result);
} }
}); });
return unsubSilence; return unsubSilence;
}, [isRecording, onRecordingComplete]); }, []);
// Auto-Start fuer Wake Word (extern getriggert) // Auto-Start fuer Wake Word (extern getriggert)
const startAutoRecording = useCallback(async () => { const startAutoRecording = useCallback(async () => {
@ -136,23 +142,35 @@ const VoiceButton: React.FC<VoiceButtonProps> = ({
} }
}; };
// Tap-to-Talk: Einmal tippen startet mit Auto-Stop // Tap-to-Talk: Einmal tippen startet mit Auto-Stop.
// Guard gegen Doppel-Tap während asyncer Start/Stop.
const tapBusy = useRef(false);
const handleTap = async () => { const handleTap = async () => {
if (disabled) return; if (disabled || tapBusy.current) return;
if (isRecording) { tapBusy.current = true;
// Aufnahme manuell stoppen try {
setIsRecording(false); // Fragen WIR den Service, nicht den React-State (Closure kann stale sein)
const result = await audioService.stopRecording(); const svcState = audioService.getRecordingState();
if (result && result.durationMs > 300) { if (svcState === 'recording') {
onRecordingComplete(result); // Aufnahme manuell stoppen
} const result = await audioService.stopRecording();
} else { setIsRecording(false);
// Aufnahme mit Auto-Stop starten if (result && result.durationMs > 300) {
const started = await audioService.startRecording(true); onRecordingComplete(result);
if (started) { }
isLongPress.current = false; } else if (svcState === 'idle') {
setIsRecording(true); // Aufnahme mit Auto-Stop starten
const started = await audioService.startRecording(true);
if (started) {
isLongPress.current = false;
setIsRecording(true);
}
} }
// svcState === 'processing': Stopp in progress — nichts tun, User
// muss nochmal tippen wenn fertig. Aber wir blockieren mit tapBusy
// kurz damit der User's UI-Feedback synchron bleibt.
} finally {
tapBusy.current = false;
} }
}; };

View File

@ -95,8 +95,8 @@ export const CONV_WINDOW_STORAGE_KEY = 'aria_conv_window_sec';
// TTS-Wiedergabegeschwindigkeit — wird pro Geraet gespeichert und an die // TTS-Wiedergabegeschwindigkeit — wird pro Geraet gespeichert und an die
// Bridge mitgegeben (speed-Param im F5-TTS infer()). 1.0 = normal. // Bridge mitgegeben (speed-Param im F5-TTS infer()). 1.0 = normal.
export const TTS_SPEED_DEFAULT = 1.0; export const TTS_SPEED_DEFAULT = 1.0;
export const TTS_SPEED_MIN = 0.5; export const TTS_SPEED_MIN = 0.1;
export const TTS_SPEED_MAX = 2.0; export const TTS_SPEED_MAX = 5.0;
export const TTS_SPEED_STORAGE_KEY = 'aria_tts_speed'; export const TTS_SPEED_STORAGE_KEY = 'aria_tts_speed';
export async function loadTtsSpeed(): Promise<number> { export async function loadTtsSpeed(): Promise<number> {
@ -196,6 +196,8 @@ class AudioService {
private lastSpeechTime: number = 0; private lastSpeechTime: number = 0;
private vadTimer: ReturnType<typeof setInterval> | null = null; private vadTimer: ReturnType<typeof setInterval> | null = null;
private maxDurationTimer: ReturnType<typeof setTimeout> | null = null; private maxDurationTimer: ReturnType<typeof setTimeout> | null = null;
// Latch damit der Silence-Callback pro Aufnahme genau einmal feuert
private silenceFired: boolean = false;
private noSpeechTimer: ReturnType<typeof setTimeout> | null = null; private noSpeechTimer: ReturnType<typeof setTimeout> | null = null;
constructor() { constructor() {
@ -305,33 +307,46 @@ class AudioService {
// Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.) // Andere Apps waehrend der Aufnahme pausieren (Musik, Videos etc.)
AudioFocus?.requestExclusive().catch(() => {}); AudioFocus?.requestExclusive().catch(() => {});
// VAD aktivieren — Stille-Dauer aus AsyncStorage (Settings-konfigurierbar) // VAD aktivieren — Stille-Dauer aus AsyncStorage (Settings-konfigurierbar).
// WICHTIG: jeder Trigger (VAD-Stille / Max-Dauer / No-Speech-Window)
// disable SOFORT den VAD-Flag und clear den Timer, BEVOR die Listener
// gefeuert werden. Sonst feuert das setInterval weiter alle 200ms und
// ruft stopRecording parallel auf → audio-recorder-player crasht.
this.vadEnabled = autoStop; this.vadEnabled = autoStop;
this.silenceFired = false;
const fireSilenceOnce = (reason: string) => {
if (this.silenceFired) return;
this.silenceFired = true;
this.vadEnabled = false;
if (this.vadTimer) { clearInterval(this.vadTimer); this.vadTimer = null; }
if (this.maxDurationTimer) { clearTimeout(this.maxDurationTimer); this.maxDurationTimer = null; }
if (this.noSpeechTimer) { clearTimeout(this.noSpeechTimer); this.noSpeechTimer = null; }
console.log('[Audio] Silence-Fire: %s', reason);
this.silenceListeners.forEach(cb => {
try { cb(); } catch (e) { console.warn('[Audio] silence listener err:', e); }
});
};
if (autoStop) { if (autoStop) {
const vadSilenceMs = await loadVadSilenceMs(); const vadSilenceMs = await loadVadSilenceMs();
console.log('[Audio] VAD-Stille:', vadSilenceMs, 'ms'); console.log('[Audio] VAD-Stille:', vadSilenceMs, 'ms');
this.vadTimer = setInterval(() => { this.vadTimer = setInterval(() => {
const silenceDuration = Date.now() - this.lastSpeechTime; const silenceDuration = Date.now() - this.lastSpeechTime;
if (silenceDuration >= vadSilenceMs) { if (silenceDuration >= vadSilenceMs) {
console.log(`[Audio] VAD: ${silenceDuration}ms Stille — Auto-Stop`); fireSilenceOnce(`VAD ${silenceDuration}ms Stille`);
this.silenceListeners.forEach(cb => cb());
} }
}, 200); }, 200);
// Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen // Notbremse: Nach MAX_RECORDING_MS zwangsweise stoppen
this.maxDurationTimer = setTimeout(() => { this.maxDurationTimer = setTimeout(() => {
console.warn(`[Audio] Max-Dauer ${MAX_RECORDING_MS}ms erreicht — Zwangs-Stop`); fireSilenceOnce(`Max-Dauer ${MAX_RECORDING_MS}ms`);
this.silenceListeners.forEach(cb => cb());
}, MAX_RECORDING_MS); }, MAX_RECORDING_MS);
} }
// Conversation-Window: Wenn der User innerhalb noSpeechTimeoutMs nicht // Conversation-Window: Wenn der User innerhalb noSpeechTimeoutMs nicht
// anfaengt zu sprechen → Aufnahme abbrechen (Speech-Gate verwirft sie), // anfaengt zu sprechen → Aufnahme abbrechen (Speech-Gate verwirft sie).
// ChatScreen erkennt das und beendet die Konversation.
if (noSpeechTimeoutMs > 0) { if (noSpeechTimeoutMs > 0) {
this.noSpeechTimer = setTimeout(() => { this.noSpeechTimer = setTimeout(() => {
if (!this.speechDetected && this.recordingState === 'recording') { if (!this.speechDetected && this.recordingState === 'recording') {
console.log(`[Audio] Conversation-Window ${noSpeechTimeoutMs}ms ohne Sprache — Stop`); fireSilenceOnce(`Conversation-Window ${noSpeechTimeoutMs}ms ohne Sprache`);
this.silenceListeners.forEach(cb => cb());
} }
}, noSpeechTimeoutMs); }, noSpeechTimeoutMs);
} }

View File

@ -1176,7 +1176,7 @@ class ARIABridge:
# Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet) # Speed-Override (TTS-Wiedergabegeschwindigkeit, pro Geraet)
try: try:
speed = float(payload.get("speed", 0) or 0) speed = float(payload.get("speed", 0) or 0)
if 0.25 <= speed <= 4.0: if 0.1 <= speed <= 5.0:
self._next_speed_override = speed self._next_speed_override = speed
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass
@ -1236,7 +1236,7 @@ class ARIABridge:
xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '') xtts_voice = payload.get("voice", "") or getattr(self, 'xtts_voice', '')
try: try:
xtts_speed = float(payload.get("speed", 0) or 0) xtts_speed = float(payload.get("speed", 0) or 0)
if not (0.25 <= xtts_speed <= 4.0): if not (0.1 <= xtts_speed <= 5.0):
xtts_speed = 1.0 xtts_speed = 1.0
except (TypeError, ValueError): except (TypeError, ValueError):
xtts_speed = 1.0 xtts_speed = 1.0
@ -1450,7 +1450,7 @@ class ARIABridge:
logger.info("[rvs] Voice-Override (via Audio): %s", voice_override) logger.info("[rvs] Voice-Override (via Audio): %s", voice_override)
try: try:
speed = float(payload.get("speed", 0) or 0) speed = float(payload.get("speed", 0) or 0)
if 0.25 <= speed <= 4.0: if 0.1 <= speed <= 5.0:
self._next_speed_override = speed self._next_speed_override = speed
except (TypeError, ValueError): except (TypeError, ValueError):
pass pass

View File

@ -762,7 +762,7 @@ async def run_loop(runner: F5Runner) -> None:
speed = float(payload.get("speed") or 1.0) speed = float(payload.get("speed") or 1.0)
except (TypeError, ValueError): except (TypeError, ValueError):
speed = 1.0 speed = 1.0
if not (0.25 <= speed <= 4.0): if not (0.1 <= speed <= 5.0):
speed = 1.0 speed = 1.0
await _tts_queue.put(( await _tts_queue.put((
payload.get("text", ""), payload.get("text", ""),