feat: Pre-Roll-Buffer fuer TTS einstellbar in App-Settings
- Kotlin start() nimmt jetzt prerollSeconds als dritten Parameter (1.0-6.0s geclampt, Fallback 3.5s bei ungueltigem Wert) - audio.ts liest Wert aus AsyncStorage vor jedem Stream-Start, exportiert Default/Min/Max/Key als Konstanten - SettingsScreen: +/- Buttons direkt unter dem TTS-Toggle, Default auf 3.5s (von 2.5s) angehoben Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6c8ba5fe2d
commit
ddd30b3059
|
|
@ -30,9 +30,10 @@ import java.util.concurrent.LinkedBlockingQueue
|
||||||
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "PcmStreamPlayer"
|
private const val TAG = "PcmStreamPlayer"
|
||||||
// Sekunden Audio die VOR play()-Start gepuffert sein muessen.
|
// Fallback wenn JS keinen Wert uebergibt.
|
||||||
// 2.5s Vorrat = genug um XTTS-Render-Pausen zwischen Chunks zu puffern.
|
private const val DEFAULT_PREROLL_SECONDS = 3.5
|
||||||
private const val PREROLL_SECONDS = 2.5
|
private const val MIN_PREROLL_SECONDS = 0.5
|
||||||
|
private const val MAX_PREROLL_SECONDS = 10.0
|
||||||
// Stille am Stream-Anfang, damit AudioTrack sauber anfaehrt und die
|
// Stille am Stream-Anfang, damit AudioTrack sauber anfaehrt und die
|
||||||
// ersten Samples nicht abgeschnitten werden (XTTS-Warmup + play()-Latenz).
|
// ersten Samples nicht abgeschnitten werden (XTTS-Warmup + play()-Latenz).
|
||||||
private const val LEADING_SILENCE_SECONDS = 0.2
|
private const val LEADING_SILENCE_SECONDS = 0.2
|
||||||
|
|
@ -53,17 +54,21 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
||||||
// ── Lifecycle ──
|
// ── Lifecycle ──
|
||||||
|
|
||||||
@ReactMethod
|
@ReactMethod
|
||||||
fun start(sampleRate: Int, channels: Int, promise: Promise) {
|
fun start(sampleRate: Int, channels: Int, prerollSeconds: Double, promise: Promise) {
|
||||||
try {
|
try {
|
||||||
// Alte Session beenden falls vorhanden
|
// Alte Session beenden falls vorhanden
|
||||||
stopInternal()
|
stopInternal()
|
||||||
|
|
||||||
|
val prerollSec = prerollSeconds
|
||||||
|
.coerceIn(MIN_PREROLL_SECONDS, MAX_PREROLL_SECONDS)
|
||||||
|
.let { if (it.isFinite() && it > 0) it else DEFAULT_PREROLL_SECONDS }
|
||||||
|
|
||||||
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
||||||
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
||||||
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
||||||
val bytesPerSecond = sampleRate * channels * 2 // 16-bit = 2 bytes
|
val bytesPerSecond = sampleRate * channels * 2 // 16-bit = 2 bytes
|
||||||
// Buffer muss mindestens PREROLL + etwas Spielraum fassen.
|
// Buffer muss mindestens PREROLL + etwas Spielraum fassen.
|
||||||
val prerollTarget = (bytesPerSecond * PREROLL_SECONDS).toInt()
|
val prerollTarget = (bytesPerSecond * prerollSec).toInt()
|
||||||
val bufferSize = (minBuf * 32).coerceAtLeast(prerollTarget * 2)
|
val bufferSize = (minBuf * 32).coerceAtLeast(prerollTarget * 2)
|
||||||
prerollBytes = prerollTarget
|
prerollBytes = prerollTarget
|
||||||
bytesBuffered = 0
|
bytesBuffered = 0
|
||||||
|
|
@ -173,7 +178,7 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
||||||
}
|
}
|
||||||
}, "PcmStreamWriter").apply { start() }
|
}, "PcmStreamWriter").apply { start() }
|
||||||
|
|
||||||
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B preroll=${prerollBytes}B")
|
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B preroll=${prerollBytes}B (${prerollSec}s)")
|
||||||
promise.resolve(true)
|
promise.resolve(true)
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "start fehlgeschlagen", e)
|
Log.e(TAG, "start fehlgeschlagen", e)
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,12 @@ import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
import DocumentPicker from 'react-native-document-picker';
|
import DocumentPicker from 'react-native-document-picker';
|
||||||
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
||||||
|
import {
|
||||||
|
TTS_PREROLL_DEFAULT_SEC,
|
||||||
|
TTS_PREROLL_MIN_SEC,
|
||||||
|
TTS_PREROLL_MAX_SEC,
|
||||||
|
TTS_PREROLL_STORAGE_KEY,
|
||||||
|
} from '../services/audio';
|
||||||
import ModeSelector from '../components/ModeSelector';
|
import ModeSelector from '../components/ModeSelector';
|
||||||
import QRScanner from '../components/QRScanner';
|
import QRScanner from '../components/QRScanner';
|
||||||
import VoiceCloneModal from '../components/VoiceCloneModal';
|
import VoiceCloneModal from '../components/VoiceCloneModal';
|
||||||
|
|
@ -73,6 +79,7 @@ const SettingsScreen: React.FC = () => {
|
||||||
const [autoDownload, setAutoDownload] = useState(true);
|
const [autoDownload, setAutoDownload] = useState(true);
|
||||||
const [storageSize, setStorageSize] = useState('...');
|
const [storageSize, setStorageSize] = useState('...');
|
||||||
const [ttsEnabled, setTtsEnabled] = useState(true);
|
const [ttsEnabled, setTtsEnabled] = useState(true);
|
||||||
|
const [ttsPrerollSec, setTtsPrerollSec] = useState<number>(TTS_PREROLL_DEFAULT_SEC);
|
||||||
const [editingPath, setEditingPath] = useState(false);
|
const [editingPath, setEditingPath] = useState(false);
|
||||||
const [xttsVoice, setXttsVoice] = useState('');
|
const [xttsVoice, setXttsVoice] = useState('');
|
||||||
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
||||||
|
|
@ -99,6 +106,14 @@ const SettingsScreen: React.FC = () => {
|
||||||
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
|
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
|
||||||
if (saved !== null) setTtsEnabled(saved === 'true');
|
if (saved !== null) setTtsEnabled(saved === 'true');
|
||||||
});
|
});
|
||||||
|
AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY).then(saved => {
|
||||||
|
if (saved != null) {
|
||||||
|
const n = parseFloat(saved);
|
||||||
|
if (isFinite(n) && n >= TTS_PREROLL_MIN_SEC && n <= TTS_PREROLL_MAX_SEC) {
|
||||||
|
setTtsPrerollSec(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
|
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
|
||||||
if (saved) setXttsVoice(saved);
|
if (saved) setXttsVoice(saved);
|
||||||
});
|
});
|
||||||
|
|
@ -527,6 +542,42 @@ const SettingsScreen: React.FC = () => {
|
||||||
/>
|
/>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
|
{ttsEnabled && (
|
||||||
|
<View style={{marginTop: 20}}>
|
||||||
|
<Text style={styles.toggleLabel}>Puffer vor Wiedergabestart</Text>
|
||||||
|
<Text style={styles.toggleHint}>
|
||||||
|
Wie viel Audio gesammelt wird bevor die Wiedergabe startet.
|
||||||
|
Hoeher = robuster gegen Render-Pausen, aber mehr Startverzoegerung.
|
||||||
|
Default: {TTS_PREROLL_DEFAULT_SEC.toFixed(1)}s.
|
||||||
|
</Text>
|
||||||
|
<View style={styles.prerollRow}>
|
||||||
|
<TouchableOpacity
|
||||||
|
style={styles.prerollButton}
|
||||||
|
onPress={() => {
|
||||||
|
const next = Math.max(TTS_PREROLL_MIN_SEC, Math.round((ttsPrerollSec - 0.5) * 10) / 10);
|
||||||
|
setTtsPrerollSec(next);
|
||||||
|
AsyncStorage.setItem(TTS_PREROLL_STORAGE_KEY, String(next));
|
||||||
|
}}
|
||||||
|
disabled={ttsPrerollSec <= TTS_PREROLL_MIN_SEC}
|
||||||
|
>
|
||||||
|
<Text style={styles.prerollButtonText}>−0.5</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
<Text style={styles.prerollValue}>{ttsPrerollSec.toFixed(1)} s</Text>
|
||||||
|
<TouchableOpacity
|
||||||
|
style={styles.prerollButton}
|
||||||
|
onPress={() => {
|
||||||
|
const next = Math.min(TTS_PREROLL_MAX_SEC, Math.round((ttsPrerollSec + 0.5) * 10) / 10);
|
||||||
|
setTtsPrerollSec(next);
|
||||||
|
AsyncStorage.setItem(TTS_PREROLL_STORAGE_KEY, String(next));
|
||||||
|
}}
|
||||||
|
disabled={ttsPrerollSec >= TTS_PREROLL_MAX_SEC}
|
||||||
|
>
|
||||||
|
<Text style={styles.prerollButtonText}>+0.5</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
</View>
|
||||||
|
</View>
|
||||||
|
)}
|
||||||
|
|
||||||
{ttsEnabled && (
|
{ttsEnabled && (
|
||||||
<View style={{marginTop: 20}}>
|
<View style={{marginTop: 20}}>
|
||||||
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
|
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
|
||||||
|
|
@ -1118,6 +1169,34 @@ const styles = StyleSheet.create({
|
||||||
bottomSpacer: {
|
bottomSpacer: {
|
||||||
height: 40,
|
height: 40,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
prerollRow: {
|
||||||
|
flexDirection: 'row',
|
||||||
|
alignItems: 'center',
|
||||||
|
justifyContent: 'center',
|
||||||
|
marginTop: 12,
|
||||||
|
gap: 16,
|
||||||
|
},
|
||||||
|
prerollButton: {
|
||||||
|
backgroundColor: '#2A2A3E',
|
||||||
|
paddingHorizontal: 18,
|
||||||
|
paddingVertical: 10,
|
||||||
|
borderRadius: 8,
|
||||||
|
minWidth: 72,
|
||||||
|
alignItems: 'center',
|
||||||
|
},
|
||||||
|
prerollButtonText: {
|
||||||
|
color: '#FFFFFF',
|
||||||
|
fontSize: 16,
|
||||||
|
fontWeight: '600',
|
||||||
|
},
|
||||||
|
prerollValue: {
|
||||||
|
color: '#FFFFFF',
|
||||||
|
fontSize: 20,
|
||||||
|
fontWeight: '700',
|
||||||
|
minWidth: 80,
|
||||||
|
textAlign: 'center',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
export default SettingsScreen;
|
export default SettingsScreen;
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
||||||
import Sound from 'react-native-sound';
|
import Sound from 'react-native-sound';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import AudioRecorderPlayer, {
|
import AudioRecorderPlayer, {
|
||||||
AudioEncoderAndroidType,
|
AudioEncoderAndroidType,
|
||||||
AudioSourceAndroidType,
|
AudioSourceAndroidType,
|
||||||
|
|
@ -41,7 +42,7 @@ const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
||||||
release: () => Promise<boolean>;
|
release: () => Promise<boolean>;
|
||||||
};
|
};
|
||||||
PcmStreamPlayer?: {
|
PcmStreamPlayer?: {
|
||||||
start: (sampleRate: number, channels: number) => Promise<boolean>;
|
start: (sampleRate: number, channels: number, prerollSeconds: number) => Promise<boolean>;
|
||||||
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
||||||
end: () => Promise<boolean>;
|
end: () => Promise<boolean>;
|
||||||
stop: () => Promise<boolean>;
|
stop: () => Promise<boolean>;
|
||||||
|
|
@ -80,6 +81,26 @@ const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — l
|
||||||
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
||||||
const MAX_RECORDING_MS = 30000;
|
const MAX_RECORDING_MS = 30000;
|
||||||
|
|
||||||
|
// Pre-Roll: Wie lange Audio im AudioTrack-Buffer liegt bevor play() startet.
|
||||||
|
// Einstellbar via Diagnostic/Settings (Key: aria_tts_preroll_sec).
|
||||||
|
export const TTS_PREROLL_DEFAULT_SEC = 3.5;
|
||||||
|
export const TTS_PREROLL_MIN_SEC = 1.0;
|
||||||
|
export const TTS_PREROLL_MAX_SEC = 6.0;
|
||||||
|
export const TTS_PREROLL_STORAGE_KEY = 'aria_tts_preroll_sec';
|
||||||
|
|
||||||
|
async function loadPrerollSec(): Promise<number> {
|
||||||
|
try {
|
||||||
|
const raw = await AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY);
|
||||||
|
if (raw != null) {
|
||||||
|
const n = parseFloat(raw);
|
||||||
|
if (isFinite(n) && n >= TTS_PREROLL_MIN_SEC && n <= TTS_PREROLL_MAX_SEC) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
return TTS_PREROLL_DEFAULT_SEC;
|
||||||
|
}
|
||||||
|
|
||||||
// --- Audio-Service ---
|
// --- Audio-Service ---
|
||||||
|
|
||||||
class AudioService {
|
class AudioService {
|
||||||
|
|
@ -373,8 +394,9 @@ class AudioService {
|
||||||
this.pcmBuffer = [];
|
this.pcmBuffer = [];
|
||||||
this.pcmBytesCollected = 0;
|
this.pcmBytesCollected = 0;
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
|
const prerollSec = await loadPrerollSec();
|
||||||
try {
|
try {
|
||||||
await PcmStreamPlayer!.start(sampleRate, channels);
|
await PcmStreamPlayer!.start(sampleRate, channels, prerollSec);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
||||||
this.pcmStreamActive = false;
|
this.pcmStreamActive = false;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue