Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 97a1a3089a | |||
| 64f18e97a0 | |||
| 9cbea27455 | |||
| c8881f9e4d | |||
| 028e3b2240 | |||
| c042f27106 | |||
| 4ceadf8be5 | |||
| ddd30b3059 | |||
| 6c8ba5fe2d | |||
| 32ddac002f | |||
| bbbe69d928 | |||
| 23c39d5bba | |||
| 5328dc8595 | |||
| 0c03b4f161 | |||
| 31fe70bab5 | |||
| 39251b3d32 | |||
| 0623de32a0 | |||
| cd5e6e7ee6 | |||
| ee3e0a0af6 | |||
| 0783b1b99d | |||
| 5492c7a46f | |||
| 4cbe184faa | |||
| 647a1cb726 | |||
| 73263b69a6 | |||
| c62ceafdc2 | |||
| 9b5a35cb4a | |||
| 5ac1a0a522 | |||
| a28b46a809 | |||
| 59c8d36a3d | |||
| 79ba7b8487 | |||
| ba62cec78c | |||
| f15b3f583f | |||
| 402bddc18a | |||
| 350069d371 |
@@ -79,8 +79,8 @@ android {
|
|||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 402
|
versionCode 501
|
||||||
versionName "0.0.4.2"
|
versionName "0.0.5.1"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,22 +13,30 @@ import com.facebook.react.bridge.ReactMethod
|
|||||||
import java.util.concurrent.LinkedBlockingQueue
|
import java.util.concurrent.LinkedBlockingQueue
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Streamt PCM-s16le Audio direkt via AudioTrack MODE_STREAM.
|
* Streamt PCM-s16le Audio direkt via AudioTrack MODE_STREAM mit Pre-Roll.
|
||||||
|
*
|
||||||
|
* Pre-Roll: AudioTrack wird zwar direkt gebaut und gefuttert, aber play()
|
||||||
|
* wird erst aufgerufen wenn PREROLL_SECONDS Audio im Buffer ist. So hat
|
||||||
|
* der Stream Zeit einen Vorrat aufzubauen — wenn XTTS mit RTF>1 rendert
|
||||||
|
* (langsamer als Echtzeit), laeuft der Buffer trotzdem nicht leer.
|
||||||
*
|
*
|
||||||
* Flow:
|
* Flow:
|
||||||
* JS: start(sampleRate, channels) → öffnet AudioTrack und startet Writer-Thread
|
* JS: start(sampleRate, channels) → öffnet AudioTrack (noch nicht play())
|
||||||
* JS: writeChunk(base64) → dekodiert, queued, Writer schreibt non-blocking
|
* JS: writeChunk(base64) → dekodiert, queued, Writer schreibt
|
||||||
* JS: end() → wartet bis Queue leer, schließt AudioTrack
|
* Writer: spielt los sobald PREROLL erreicht ist
|
||||||
* JS: stop() → Hart stoppen, Queue leeren (Cancel)
|
* JS: end() → wartet bis Queue leer, schließt
|
||||||
*
|
* JS: stop() → Hart stoppen (Cancel)
|
||||||
* Vorteil gegenüber Sound-File-Queue:
|
|
||||||
* - Keine Gap zwischen Chunks (AudioTrack puffert intern)
|
|
||||||
* - Erste Samples beginnen zu spielen sobald der erste Chunk da ist
|
|
||||||
* - Kein WAV-Header-Parsing pro Chunk
|
|
||||||
*/
|
*/
|
||||||
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContextBaseJavaModule(reactContext) {
|
||||||
companion object {
|
companion object {
|
||||||
private const val TAG = "PcmStreamPlayer"
|
private const val TAG = "PcmStreamPlayer"
|
||||||
|
// Fallback wenn JS keinen Wert uebergibt.
|
||||||
|
private const val DEFAULT_PREROLL_SECONDS = 3.5
|
||||||
|
private const val MIN_PREROLL_SECONDS = 0.5
|
||||||
|
private const val MAX_PREROLL_SECONDS = 10.0
|
||||||
|
// Stille am Stream-Anfang, damit AudioTrack sauber anfaehrt und die
|
||||||
|
// ersten Samples nicht abgeschnitten werden (XTTS-Warmup + play()-Latenz).
|
||||||
|
private const val LEADING_SILENCE_SECONDS = 0.2
|
||||||
}
|
}
|
||||||
|
|
||||||
override fun getName() = "PcmStreamPlayer"
|
override fun getName() = "PcmStreamPlayer"
|
||||||
@@ -38,20 +46,34 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
|||||||
private var writerThread: Thread? = null
|
private var writerThread: Thread? = null
|
||||||
@Volatile private var writerShouldStop = false
|
@Volatile private var writerShouldStop = false
|
||||||
@Volatile private var endRequested = false
|
@Volatile private var endRequested = false
|
||||||
|
@Volatile private var prerollBytes: Int = 0
|
||||||
|
@Volatile private var playbackStarted = false
|
||||||
|
@Volatile private var bytesBuffered: Long = 0
|
||||||
|
@Volatile private var streamBytesPerFrame: Int = 2 // mono s16le default
|
||||||
|
|
||||||
// ── Lifecycle ──
|
// ── Lifecycle ──
|
||||||
|
|
||||||
@ReactMethod
|
@ReactMethod
|
||||||
fun start(sampleRate: Int, channels: Int, promise: Promise) {
|
fun start(sampleRate: Int, channels: Int, prerollSeconds: Double, promise: Promise) {
|
||||||
try {
|
try {
|
||||||
// Alte Session beenden falls vorhanden
|
// Alte Session beenden falls vorhanden
|
||||||
stopInternal()
|
stopInternal()
|
||||||
|
|
||||||
|
val prerollSec = prerollSeconds
|
||||||
|
.coerceIn(MIN_PREROLL_SECONDS, MAX_PREROLL_SECONDS)
|
||||||
|
.let { if (it.isFinite() && it > 0) it else DEFAULT_PREROLL_SECONDS }
|
||||||
|
|
||||||
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
val channelConfig = if (channels == 2) AudioFormat.CHANNEL_OUT_STEREO else AudioFormat.CHANNEL_OUT_MONO
|
||||||
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
val encoding = AudioFormat.ENCODING_PCM_16BIT
|
||||||
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
val minBuf = AudioTrack.getMinBufferSize(sampleRate, channelConfig, encoding)
|
||||||
// Etwas grosszuegiger Buffer: 8x MinSize (ca. 200-400ms bei 24kHz) — glatt auch bei kleinen Netzwerk-Aussetzern
|
val bytesPerSecond = sampleRate * channels * 2 // 16-bit = 2 bytes
|
||||||
val bufferSize = (minBuf * 8).coerceAtLeast(32 * 1024)
|
// Buffer muss mindestens PREROLL + etwas Spielraum fassen.
|
||||||
|
val prerollTarget = (bytesPerSecond * prerollSec).toInt()
|
||||||
|
val bufferSize = (minBuf * 32).coerceAtLeast(prerollTarget * 2)
|
||||||
|
prerollBytes = prerollTarget
|
||||||
|
bytesBuffered = 0
|
||||||
|
playbackStarted = false
|
||||||
|
streamBytesPerFrame = channels * 2 // s16 = 2 bytes per sample
|
||||||
|
|
||||||
val newTrack = AudioTrack.Builder()
|
val newTrack = AudioTrack.Builder()
|
||||||
.setAudioAttributes(
|
.setAudioAttributes(
|
||||||
@@ -71,7 +93,7 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
|||||||
.setTransferMode(AudioTrack.MODE_STREAM)
|
.setTransferMode(AudioTrack.MODE_STREAM)
|
||||||
.build()
|
.build()
|
||||||
|
|
||||||
newTrack.play()
|
// AudioTrack erstellen — play() wird erst aufgerufen wenn Pre-Roll erreicht.
|
||||||
track = newTrack
|
track = newTrack
|
||||||
queue.clear()
|
queue.clear()
|
||||||
writerShouldStop = false
|
writerShouldStop = false
|
||||||
@@ -80,27 +102,83 @@ class PcmStreamPlayerModule(reactContext: ReactApplicationContext) : ReactContex
|
|||||||
writerThread = Thread({
|
writerThread = Thread({
|
||||||
val t = track ?: return@Thread
|
val t = track ?: return@Thread
|
||||||
try {
|
try {
|
||||||
|
// Leading-Silence in den Buffer — gibt AudioTrack Zeit anzufahren.
|
||||||
|
val silenceBytes = ((sampleRate * channels * 2) * LEADING_SILENCE_SECONDS).toInt() and 0x7FFFFFFE
|
||||||
|
if (silenceBytes > 0) {
|
||||||
|
val silence = ByteArray(silenceBytes)
|
||||||
|
var silOff = 0
|
||||||
|
while (silOff < silence.size && !writerShouldStop) {
|
||||||
|
val w = t.write(silence, silOff, silence.size - silOff)
|
||||||
|
if (w <= 0) break
|
||||||
|
silOff += w
|
||||||
|
}
|
||||||
|
bytesBuffered += silence.size
|
||||||
|
}
|
||||||
while (!writerShouldStop) {
|
while (!writerShouldStop) {
|
||||||
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS) ?: run {
|
val data = queue.poll(50, java.util.concurrent.TimeUnit.MILLISECONDS) ?: run {
|
||||||
if (endRequested) return@Thread
|
if (endRequested) {
|
||||||
|
// Falls wir vor Pre-Roll enden (kurzer Text): trotzdem abspielen
|
||||||
|
if (!playbackStarted) {
|
||||||
|
try { t.play() } catch (_: Exception) {}
|
||||||
|
playbackStarted = true
|
||||||
|
}
|
||||||
|
return@Thread
|
||||||
|
}
|
||||||
null
|
null
|
||||||
} ?: continue
|
} ?: continue
|
||||||
|
|
||||||
|
// Pre-Roll Check: play() erst wenn genug gepuffert
|
||||||
|
if (!playbackStarted && bytesBuffered + data.size >= prerollBytes) {
|
||||||
|
try {
|
||||||
|
t.play()
|
||||||
|
playbackStarted = true
|
||||||
|
Log.i(TAG, "Playback gestartet nach Pre-Roll ${bytesBuffered + data.size} Bytes")
|
||||||
|
} catch (e: Exception) {
|
||||||
|
Log.w(TAG, "play() failed: ${e.message}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var offset = 0
|
var offset = 0
|
||||||
while (offset < data.size && !writerShouldStop) {
|
while (offset < data.size && !writerShouldStop) {
|
||||||
val written = t.write(data, offset, data.size - offset)
|
val written = t.write(data, offset, data.size - offset)
|
||||||
if (written <= 0) break
|
if (written <= 0) break
|
||||||
offset += written
|
offset += written
|
||||||
}
|
}
|
||||||
|
bytesBuffered += data.size
|
||||||
}
|
}
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.w(TAG, "Writer-Thread Fehler: ${e.message}")
|
Log.w(TAG, "Writer-Thread Fehler: ${e.message}")
|
||||||
} finally {
|
} finally {
|
||||||
|
// Warten bis alle geschriebenen Samples tatsaechlich abgespielt sind,
|
||||||
|
// sonst cuttet t.release() die letzten Sekunden ab.
|
||||||
|
try {
|
||||||
|
val totalFrames = (bytesBuffered / streamBytesPerFrame).toInt()
|
||||||
|
var lastPos = -1
|
||||||
|
var stalledCount = 0
|
||||||
|
while (!writerShouldStop) {
|
||||||
|
val pos = t.playbackHeadPosition
|
||||||
|
if (pos >= totalFrames) break
|
||||||
|
// Safety: wenn Position 2s nicht mehr vorwaerts → AudioTrack hing
|
||||||
|
if (pos == lastPos) {
|
||||||
|
stalledCount++
|
||||||
|
if (stalledCount > 40) {
|
||||||
|
Log.w(TAG, "playback stalled at $pos/$totalFrames — give up")
|
||||||
|
break
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
stalledCount = 0
|
||||||
|
lastPos = pos
|
||||||
|
}
|
||||||
|
Thread.sleep(50)
|
||||||
|
}
|
||||||
|
Log.i(TAG, "Playback fertig: frames=$totalFrames pos=${t.playbackHeadPosition}")
|
||||||
|
} catch (_: Exception) {}
|
||||||
try { t.stop() } catch (_: Exception) {}
|
try { t.stop() } catch (_: Exception) {}
|
||||||
try { t.release() } catch (_: Exception) {}
|
try { t.release() } catch (_: Exception) {}
|
||||||
}
|
}
|
||||||
}, "PcmStreamWriter").apply { start() }
|
}, "PcmStreamWriter").apply { start() }
|
||||||
|
|
||||||
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B")
|
Log.i(TAG, "Stream gestartet: ${sampleRate}Hz ch=$channels buf=${bufferSize}B preroll=${prerollBytes}B (${prerollSec}s)")
|
||||||
promise.resolve(true)
|
promise.resolve(true)
|
||||||
} catch (e: Exception) {
|
} catch (e: Exception) {
|
||||||
Log.e(TAG, "start fehlgeschlagen", e)
|
Log.e(TAG, "start fehlgeschlagen", e)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.0.4.2",
|
"version": "0.0.5.1",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ import {
|
|||||||
Image,
|
Image,
|
||||||
ScrollView,
|
ScrollView,
|
||||||
Modal,
|
Modal,
|
||||||
|
ToastAndroid,
|
||||||
} from 'react-native';
|
} from 'react-native';
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
@@ -325,6 +326,26 @@ const ChatScreen: React.FC = () => {
|
|||||||
const tool = (message.payload.tool as string) || '';
|
const tool = (message.payload.tool as string) || '';
|
||||||
setAgentActivity({ activity, tool });
|
setAgentActivity({ activity, tool });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Voice-Config aus Diagnostic — setzt die lokale App-Stimme auf den
|
||||||
|
// gerade in Diagnostic gewaehlten Wert zurueck. User-Wahl in der App
|
||||||
|
// wird dadurch ueberschrieben.
|
||||||
|
if (message.type === ('config' as any)) {
|
||||||
|
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
|
||||||
|
localXttsVoiceRef.current = newVoice;
|
||||||
|
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||||
|
}
|
||||||
|
|
||||||
|
// XTTS-Bridge meldet Stimme fertig geladen (kurzer Status-Toast)
|
||||||
|
if (message.type === ('voice_ready' as any)) {
|
||||||
|
const v = ((message.payload as any).voice as string) ?? '';
|
||||||
|
const err = (message.payload as any).error as string | undefined;
|
||||||
|
if (err) {
|
||||||
|
ToastAndroid.show(`Stimme "${v}" Fehler: ${err}`, ToastAndroid.LONG);
|
||||||
|
} else {
|
||||||
|
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const unsubState = rvs.onStateChange((state) => {
|
const unsubState = rvs.onStateChange((state) => {
|
||||||
|
|||||||
@@ -15,11 +15,19 @@ import {
|
|||||||
StyleSheet,
|
StyleSheet,
|
||||||
Alert,
|
Alert,
|
||||||
Platform,
|
Platform,
|
||||||
|
ToastAndroid,
|
||||||
|
ActivityIndicator,
|
||||||
} from 'react-native';
|
} from 'react-native';
|
||||||
import AsyncStorage from '@react-native-async-storage/async-storage';
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
import DocumentPicker from 'react-native-document-picker';
|
import DocumentPicker from 'react-native-document-picker';
|
||||||
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
import rvs, { ConnectionState, RVSMessage, ConnectionConfig, ConnectionLogEntry } from '../services/rvs';
|
||||||
|
import {
|
||||||
|
TTS_PREROLL_DEFAULT_SEC,
|
||||||
|
TTS_PREROLL_MIN_SEC,
|
||||||
|
TTS_PREROLL_MAX_SEC,
|
||||||
|
TTS_PREROLL_STORAGE_KEY,
|
||||||
|
} from '../services/audio';
|
||||||
import ModeSelector from '../components/ModeSelector';
|
import ModeSelector from '../components/ModeSelector';
|
||||||
import QRScanner from '../components/QRScanner';
|
import QRScanner from '../components/QRScanner';
|
||||||
import VoiceCloneModal from '../components/VoiceCloneModal';
|
import VoiceCloneModal from '../components/VoiceCloneModal';
|
||||||
@@ -73,8 +81,10 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const [autoDownload, setAutoDownload] = useState(true);
|
const [autoDownload, setAutoDownload] = useState(true);
|
||||||
const [storageSize, setStorageSize] = useState('...');
|
const [storageSize, setStorageSize] = useState('...');
|
||||||
const [ttsEnabled, setTtsEnabled] = useState(true);
|
const [ttsEnabled, setTtsEnabled] = useState(true);
|
||||||
|
const [ttsPrerollSec, setTtsPrerollSec] = useState<number>(TTS_PREROLL_DEFAULT_SEC);
|
||||||
const [editingPath, setEditingPath] = useState(false);
|
const [editingPath, setEditingPath] = useState(false);
|
||||||
const [xttsVoice, setXttsVoice] = useState('');
|
const [xttsVoice, setXttsVoice] = useState('');
|
||||||
|
const [loadingVoice, setLoadingVoice] = useState<string | null>(null);
|
||||||
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
const [availableVoices, setAvailableVoices] = useState<Array<{name: string, size: number}>>([]);
|
||||||
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
|
const [voiceCloneVisible, setVoiceCloneVisible] = useState(false);
|
||||||
const [tempPath, setTempPath] = useState('');
|
const [tempPath, setTempPath] = useState('');
|
||||||
@@ -99,6 +109,14 @@ const SettingsScreen: React.FC = () => {
|
|||||||
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
|
AsyncStorage.getItem('aria_tts_enabled').then(saved => {
|
||||||
if (saved !== null) setTtsEnabled(saved === 'true');
|
if (saved !== null) setTtsEnabled(saved === 'true');
|
||||||
});
|
});
|
||||||
|
AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY).then(saved => {
|
||||||
|
if (saved != null) {
|
||||||
|
const n = parseFloat(saved);
|
||||||
|
if (isFinite(n) && n >= TTS_PREROLL_MIN_SEC && n <= TTS_PREROLL_MAX_SEC) {
|
||||||
|
setTtsPrerollSec(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
|
AsyncStorage.getItem('aria_xtts_voice').then(saved => {
|
||||||
if (saved) setXttsVoice(saved);
|
if (saved) setXttsVoice(saved);
|
||||||
});
|
});
|
||||||
@@ -250,6 +268,31 @@ const SettingsScreen: React.FC = () => {
|
|||||||
}
|
}
|
||||||
rvs.send('xtts_list_voices' as any, {});
|
rvs.send('xtts_list_voices' as any, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Diagnostic-Voice-Wechsel → lokale App-Stimme auf den neuen Default zuruecksetzen.
|
||||||
|
// Zusaetzlich Preload triggern, damit der User weiss wann's geladen ist.
|
||||||
|
if (message.type === ('config' as any)) {
|
||||||
|
const newVoice = ((message.payload as any).xttsVoice as string) ?? '';
|
||||||
|
setXttsVoice(newVoice);
|
||||||
|
AsyncStorage.setItem('aria_xtts_voice', newVoice);
|
||||||
|
if (newVoice) {
|
||||||
|
setLoadingVoice(newVoice);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// XTTS-Bridge meldet: Stimme fertig geladen
|
||||||
|
if (message.type === ('voice_ready' as any)) {
|
||||||
|
const v = ((message.payload as any).voice as string) ?? '';
|
||||||
|
const err = (message.payload as any).error as string | undefined;
|
||||||
|
const ms = (message.payload as any).loadMs as number | undefined;
|
||||||
|
setLoadingVoice(null);
|
||||||
|
if (err) {
|
||||||
|
ToastAndroid.show(`Stimme "${v}" konnte nicht geladen werden: ${err}`, ToastAndroid.LONG);
|
||||||
|
} else {
|
||||||
|
const suffix = ms ? ` (${(ms / 1000).toFixed(1)}s)` : '';
|
||||||
|
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit${suffix}`, ToastAndroid.SHORT);
|
||||||
|
}
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
return () => {
|
return () => {
|
||||||
@@ -318,6 +361,13 @@ const SettingsScreen: React.FC = () => {
|
|||||||
const selectVoice = useCallback((voiceName: string) => {
|
const selectVoice = useCallback((voiceName: string) => {
|
||||||
setXttsVoice(voiceName);
|
setXttsVoice(voiceName);
|
||||||
AsyncStorage.setItem('aria_xtts_voice', voiceName);
|
AsyncStorage.setItem('aria_xtts_voice', voiceName);
|
||||||
|
// Preload nur fuer Custom-Voices — "Standard" braucht keinen Ladevorgang
|
||||||
|
if (voiceName) {
|
||||||
|
setLoadingVoice(voiceName);
|
||||||
|
rvs.send('voice_preload' as any, { voice: voiceName, source: 'app' });
|
||||||
|
} else {
|
||||||
|
setLoadingVoice(null);
|
||||||
|
}
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
const deleteVoice = useCallback((name: string) => {
|
const deleteVoice = useCallback((name: string) => {
|
||||||
@@ -527,6 +577,42 @@ const SettingsScreen: React.FC = () => {
|
|||||||
/>
|
/>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
|
{ttsEnabled && (
|
||||||
|
<View style={{marginTop: 20}}>
|
||||||
|
<Text style={styles.toggleLabel}>Puffer vor Wiedergabestart</Text>
|
||||||
|
<Text style={styles.toggleHint}>
|
||||||
|
Wie viel Audio gesammelt wird bevor die Wiedergabe startet.
|
||||||
|
Hoeher = robuster gegen Render-Pausen, aber mehr Startverzoegerung.
|
||||||
|
Default: {TTS_PREROLL_DEFAULT_SEC.toFixed(1)}s.
|
||||||
|
</Text>
|
||||||
|
<View style={styles.prerollRow}>
|
||||||
|
<TouchableOpacity
|
||||||
|
style={styles.prerollButton}
|
||||||
|
onPress={() => {
|
||||||
|
const next = Math.max(TTS_PREROLL_MIN_SEC, Math.round((ttsPrerollSec - 0.5) * 10) / 10);
|
||||||
|
setTtsPrerollSec(next);
|
||||||
|
AsyncStorage.setItem(TTS_PREROLL_STORAGE_KEY, String(next));
|
||||||
|
}}
|
||||||
|
disabled={ttsPrerollSec <= TTS_PREROLL_MIN_SEC}
|
||||||
|
>
|
||||||
|
<Text style={styles.prerollButtonText}>−0.5</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
<Text style={styles.prerollValue}>{ttsPrerollSec.toFixed(1)} s</Text>
|
||||||
|
<TouchableOpacity
|
||||||
|
style={styles.prerollButton}
|
||||||
|
onPress={() => {
|
||||||
|
const next = Math.min(TTS_PREROLL_MAX_SEC, Math.round((ttsPrerollSec + 0.5) * 10) / 10);
|
||||||
|
setTtsPrerollSec(next);
|
||||||
|
AsyncStorage.setItem(TTS_PREROLL_STORAGE_KEY, String(next));
|
||||||
|
}}
|
||||||
|
disabled={ttsPrerollSec >= TTS_PREROLL_MAX_SEC}
|
||||||
|
>
|
||||||
|
<Text style={styles.prerollButtonText}>+0.5</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
</View>
|
||||||
|
</View>
|
||||||
|
)}
|
||||||
|
|
||||||
{ttsEnabled && (
|
{ttsEnabled && (
|
||||||
<View style={{marginTop: 20}}>
|
<View style={{marginTop: 20}}>
|
||||||
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
|
<Text style={styles.toggleLabel}>Stimme (geraetelokal)</Text>
|
||||||
@@ -561,7 +647,10 @@ const SettingsScreen: React.FC = () => {
|
|||||||
</Text>
|
</Text>
|
||||||
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
|
<Text style={styles.voiceRowMeta}>{(v.size / 1024).toFixed(0)} KB</Text>
|
||||||
</TouchableOpacity>
|
</TouchableOpacity>
|
||||||
{xttsVoice === v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
{loadingVoice === v.name && (
|
||||||
|
<ActivityIndicator size="small" color="#0096FF" style={{marginRight: 8}} />
|
||||||
|
)}
|
||||||
|
{xttsVoice === v.name && loadingVoice !== v.name && <Text style={styles.voiceRowCheck}>{'\u2713'}</Text>}
|
||||||
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
|
<TouchableOpacity onPress={() => deleteVoice(v.name)} style={styles.voiceRowDelete}>
|
||||||
<Text style={styles.voiceRowDeleteIcon}>X</Text>
|
<Text style={styles.voiceRowDeleteIcon}>X</Text>
|
||||||
</TouchableOpacity>
|
</TouchableOpacity>
|
||||||
@@ -1118,6 +1207,34 @@ const styles = StyleSheet.create({
|
|||||||
bottomSpacer: {
|
bottomSpacer: {
|
||||||
height: 40,
|
height: 40,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
prerollRow: {
|
||||||
|
flexDirection: 'row',
|
||||||
|
alignItems: 'center',
|
||||||
|
justifyContent: 'center',
|
||||||
|
marginTop: 12,
|
||||||
|
gap: 16,
|
||||||
|
},
|
||||||
|
prerollButton: {
|
||||||
|
backgroundColor: '#2A2A3E',
|
||||||
|
paddingHorizontal: 18,
|
||||||
|
paddingVertical: 10,
|
||||||
|
borderRadius: 8,
|
||||||
|
minWidth: 72,
|
||||||
|
alignItems: 'center',
|
||||||
|
},
|
||||||
|
prerollButtonText: {
|
||||||
|
color: '#FFFFFF',
|
||||||
|
fontSize: 16,
|
||||||
|
fontWeight: '600',
|
||||||
|
},
|
||||||
|
prerollValue: {
|
||||||
|
color: '#FFFFFF',
|
||||||
|
fontSize: 20,
|
||||||
|
fontWeight: '700',
|
||||||
|
minWidth: 80,
|
||||||
|
textAlign: 'center',
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
export default SettingsScreen;
|
export default SettingsScreen;
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
import { Platform, PermissionsAndroid, NativeModules } from 'react-native';
|
||||||
import Sound from 'react-native-sound';
|
import Sound from 'react-native-sound';
|
||||||
import RNFS from 'react-native-fs';
|
import RNFS from 'react-native-fs';
|
||||||
|
import AsyncStorage from '@react-native-async-storage/async-storage';
|
||||||
import AudioRecorderPlayer, {
|
import AudioRecorderPlayer, {
|
||||||
AudioEncoderAndroidType,
|
AudioEncoderAndroidType,
|
||||||
AudioSourceAndroidType,
|
AudioSourceAndroidType,
|
||||||
@@ -41,7 +42,7 @@ const { AudioFocus, PcmStreamPlayer } = NativeModules as {
|
|||||||
release: () => Promise<boolean>;
|
release: () => Promise<boolean>;
|
||||||
};
|
};
|
||||||
PcmStreamPlayer?: {
|
PcmStreamPlayer?: {
|
||||||
start: (sampleRate: number, channels: number) => Promise<boolean>;
|
start: (sampleRate: number, channels: number, prerollSeconds: number) => Promise<boolean>;
|
||||||
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
writeChunk: (base64Pcm: string) => Promise<boolean>;
|
||||||
end: () => Promise<boolean>;
|
end: () => Promise<boolean>;
|
||||||
stop: () => Promise<boolean>;
|
stop: () => Promise<boolean>;
|
||||||
@@ -80,6 +81,26 @@ const VAD_SPEECH_MIN_MS = 500; // ms Sprache bevor Aufnahme zaehlt — l
|
|||||||
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
// Max-Dauer einer Aufnahme in Gespraechsmodus (Notbremse gegen Runaway-Loops)
|
||||||
const MAX_RECORDING_MS = 30000;
|
const MAX_RECORDING_MS = 30000;
|
||||||
|
|
||||||
|
// Pre-Roll: Wie lange Audio im AudioTrack-Buffer liegt bevor play() startet.
|
||||||
|
// Einstellbar via Diagnostic/Settings (Key: aria_tts_preroll_sec).
|
||||||
|
export const TTS_PREROLL_DEFAULT_SEC = 3.5;
|
||||||
|
export const TTS_PREROLL_MIN_SEC = 1.0;
|
||||||
|
export const TTS_PREROLL_MAX_SEC = 6.0;
|
||||||
|
export const TTS_PREROLL_STORAGE_KEY = 'aria_tts_preroll_sec';
|
||||||
|
|
||||||
|
async function loadPrerollSec(): Promise<number> {
|
||||||
|
try {
|
||||||
|
const raw = await AsyncStorage.getItem(TTS_PREROLL_STORAGE_KEY);
|
||||||
|
if (raw != null) {
|
||||||
|
const n = parseFloat(raw);
|
||||||
|
if (isFinite(n) && n >= TTS_PREROLL_MIN_SEC && n <= TTS_PREROLL_MAX_SEC) {
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {}
|
||||||
|
return TTS_PREROLL_DEFAULT_SEC;
|
||||||
|
}
|
||||||
|
|
||||||
// --- Audio-Service ---
|
// --- Audio-Service ---
|
||||||
|
|
||||||
class AudioService {
|
class AudioService {
|
||||||
@@ -373,8 +394,9 @@ class AudioService {
|
|||||||
this.pcmBuffer = [];
|
this.pcmBuffer = [];
|
||||||
this.pcmBytesCollected = 0;
|
this.pcmBytesCollected = 0;
|
||||||
if (!silent) {
|
if (!silent) {
|
||||||
|
const prerollSec = await loadPrerollSec();
|
||||||
try {
|
try {
|
||||||
await PcmStreamPlayer!.start(sampleRate, channels);
|
await PcmStreamPlayer!.start(sampleRate, channels, prerollSec);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
console.error('[Audio] PcmStreamPlayer.start fehlgeschlagen:', err);
|
||||||
this.pcmStreamActive = false;
|
this.pcmStreamActive = false;
|
||||||
|
|||||||
+26
-19
@@ -150,6 +150,15 @@ def _small_range_to_words(m):
|
|||||||
return f"{_num_to_words_de(a)} bis {_num_to_words_de(b)}"
|
return f"{_num_to_words_de(a)} bis {_num_to_words_de(b)}"
|
||||||
|
|
||||||
|
|
||||||
|
def _decimal_to_words(m):
|
||||||
|
"""'0.1' / '0,1' → 'null komma eins', '1,25' → 'eins komma zwei fuenf'."""
|
||||||
|
int_part = int(m.group(1))
|
||||||
|
dec_part = m.group(2)
|
||||||
|
int_word = _num_to_words_de(int_part) if 0 <= int_part <= 59 else str(int_part)
|
||||||
|
dec_words = " ".join(_num_to_words_de(int(d)) for d in dec_part)
|
||||||
|
return f"{int_word} komma {dec_words}"
|
||||||
|
|
||||||
|
|
||||||
_UNIT_WORDS = [
|
_UNIT_WORDS = [
|
||||||
(r'\bTB\b', 'Terabyte'),
|
(r'\bTB\b', 'Terabyte'),
|
||||||
(r'\bGB\b', 'Gigabyte'),
|
(r'\bGB\b', 'Gigabyte'),
|
||||||
@@ -236,6 +245,11 @@ def clean_text_for_tts(text: str) -> str:
|
|||||||
# Kleine Zahlen-Bereiche ohne "Uhr": "5-6" → "fuenf bis sechs"
|
# Kleine Zahlen-Bereiche ohne "Uhr": "5-6" → "fuenf bis sechs"
|
||||||
t = _re_tts.sub(r'\b(\d{1,2})\s*[-–]\s*(\d{1,2})\b', _small_range_to_words, t)
|
t = _re_tts.sub(r'\b(\d{1,2})\s*[-–]\s*(\d{1,2})\b', _small_range_to_words, t)
|
||||||
|
|
||||||
|
# Dezimalzahlen: "0.1" / "0,5" / "1,25" → "null komma eins" / "null komma fuenf" / ...
|
||||||
|
# Muss vor "Zahl+Einheit" laufen, sonst frisst die Unit-Regel den Nachkommaanteil.
|
||||||
|
# Lookahead verhindert Match auf IP-artigen Strings wie 192.168.1.1.
|
||||||
|
t = _re_tts.sub(r'\b(\d+)[.,](\d+)(?![.,\d])', _decimal_to_words, t)
|
||||||
|
|
||||||
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
|
# Zahlen + Einheit: "22GB" → "22 Gigabyte" (Leerzeichen einfuegen)
|
||||||
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
|
t = _re_tts.sub(r'(\d+)([A-Za-z]{1,4})\b', r'\1 \2', t)
|
||||||
|
|
||||||
@@ -243,6 +257,12 @@ def clean_text_for_tts(text: str) -> str:
|
|||||||
for pat, repl in _UNIT_WORDS:
|
for pat, repl in _UNIT_WORDS:
|
||||||
t = _re_tts.sub(pat, repl, t)
|
t = _re_tts.sub(pat, repl, t)
|
||||||
|
|
||||||
|
# Generisches Buchstabieren: alle verbleibenden 2-5-Zeichen-Grossbuchstaben-Woerter
|
||||||
|
# (XTTS, USB, DNS, JSON, HTML, ...) → "X T T S". Laeuft NACH der expliziten Liste,
|
||||||
|
# damit TTS/GPU/... schon aufgeloest sind. "WLAN"-artige, die als Wort gesprochen
|
||||||
|
# werden, koennen bei Bedarf explizit in _UNIT_WORDS uebersteuert werden.
|
||||||
|
t = _re_tts.sub(r'\b([A-Z]{2,5})\b', lambda m: " ".join(m.group(1)), t)
|
||||||
|
|
||||||
# Anfuehrungszeichen
|
# Anfuehrungszeichen
|
||||||
t = _re_tts.sub(r'["""„`]', '', t)
|
t = _re_tts.sub(r'["""„`]', '', t)
|
||||||
|
|
||||||
@@ -1100,25 +1120,12 @@ class ARIABridge:
|
|||||||
return
|
return
|
||||||
|
|
||||||
elif msg_type == "audio_pcm":
|
elif msg_type == "audio_pcm":
|
||||||
# XTTS-PCM-Stream vom Gaming-PC empfangen → durchleiten zur App.
|
# Audio-PCM geht direkt von XTTS-Bridge an die App.
|
||||||
# Wenn in payload kein messageId (alte XTTS-Bridge), aus requestId auflösen.
|
# Die aria-bridge darf es NICHT rebroadcasten — sonst bekommt die App
|
||||||
error = payload.get("error", "")
|
# jeden Chunk doppelt (einmal direkt von XTTS-Bridge via RVS-Broadcast,
|
||||||
if error:
|
# einmal indirekt via uns).
|
||||||
logger.warning("[rvs] XTTS PCM-Fehler: %s", error)
|
# Wir ignorieren diese Message hier einfach — messageId wird von
|
||||||
return
|
# XTTS-Bridge selbst im Payload mitgeliefert.
|
||||||
linked_message_id = payload.get("messageId", "")
|
|
||||||
if not linked_message_id:
|
|
||||||
req_id_full = payload.get("requestId", "")
|
|
||||||
req_id_base = req_id_full.rsplit("_", 1)[0] if "_" in req_id_full else req_id_full
|
|
||||||
linked_message_id = self._xtts_request_to_message.get(req_id_base, "")
|
|
||||||
# Einfach 1:1 weiterleiten mit eingefuellter messageId
|
|
||||||
forwarded = dict(payload)
|
|
||||||
forwarded["messageId"] = linked_message_id
|
|
||||||
await self._send_to_rvs({
|
|
||||||
"type": "audio_pcm",
|
|
||||||
"payload": forwarded,
|
|
||||||
"timestamp": int(asyncio.get_event_loop().time() * 1000),
|
|
||||||
})
|
|
||||||
return
|
return
|
||||||
|
|
||||||
elif msg_type == "xtts_response":
|
elif msg_type == "xtts_response":
|
||||||
|
|||||||
+105
-1
@@ -127,6 +127,33 @@
|
|||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
<!-- Disk-Space Warnung (dynamisch gesetzt) -->
|
||||||
|
<div id="disk-banner" style="display:none;position:sticky;top:0;z-index:500;padding:10px 14px;border-radius:0;margin:-16px -16px 12px -16px;font-size:13px;">
|
||||||
|
<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;">
|
||||||
|
<span id="disk-banner-icon" style="font-size:18px;">⚠️</span>
|
||||||
|
<span id="disk-banner-text" style="flex:1;min-width:200px;font-weight:600;"></span>
|
||||||
|
<button onclick="copyDiskCmd('safe')" class="btn secondary" style="padding:4px 10px;font-size:11px;" title="docker builder prune -a -f && docker image prune -a -f">
|
||||||
|
Sicher aufraeumen
|
||||||
|
</button>
|
||||||
|
<button onclick="document.getElementById('disk-banner-aggressive').style.display=(document.getElementById('disk-banner-aggressive').style.display==='none'?'flex':'none')"
|
||||||
|
class="btn secondary" style="padding:4px 10px;font-size:11px;">
|
||||||
|
Mehr ▾
|
||||||
|
</button>
|
||||||
|
<button onclick="document.getElementById('disk-banner').style.display='none'" class="btn secondary" style="padding:4px 10px;font-size:11px;">Schliessen</button>
|
||||||
|
</div>
|
||||||
|
<!-- Aggressive Variante (erst nach Klick sichtbar) -->
|
||||||
|
<div id="disk-banner-aggressive" style="display:none;margin-top:10px;padding:8px;background:rgba(0,0,0,0.25);border-radius:4px;flex-direction:column;gap:6px;font-size:12px;">
|
||||||
|
<div>
|
||||||
|
<b>Sicher</b> (empfohlen) — Build-Cache + ungenutzte Images, keine Volumes:<br>
|
||||||
|
<code style="font-family:monospace;">docker builder prune -a -f && docker image prune -a -f</code>
|
||||||
|
</div>
|
||||||
|
<div style="color:#FFAA55;">
|
||||||
|
<b>Aggressiv</b> — zusaetzlich ungenutzte Volumes. <b>Nur wenn alle ARIA-Container laufen</b>, sonst riskierst du Daten-Verlust (Sessions, SSH-Keys, Shared):<br>
|
||||||
|
<code style="font-family:monospace;">docker system prune -a --volumes -f</code>
|
||||||
|
<button onclick="copyDiskCmd('aggressive')" class="btn secondary" style="padding:2px 8px;font-size:10px;margin-left:6px;">Kopieren</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<h1>ARIA Diagnostic</h1>
|
<h1>ARIA Diagnostic</h1>
|
||||||
|
|
||||||
<!-- Haupt-Navigation -->
|
<!-- Haupt-Navigation -->
|
||||||
@@ -411,13 +438,14 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- XTTS Stimme -->
|
<!-- XTTS Stimme -->
|
||||||
<div style="display:flex;align-items:center;gap:12px;margin-bottom:12px;">
|
<div style="display:flex;align-items:center;gap:12px;margin-bottom:6px;">
|
||||||
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
|
<label style="color:#8888AA;font-size:12px;">XTTS Stimme:</label>
|
||||||
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
<select id="diag-xtts-voice" onchange="sendVoiceConfig()" style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
<option value="">Standard (XTTS Default)</option>
|
<option value="">Standard (XTTS Default)</option>
|
||||||
</select>
|
</select>
|
||||||
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
|
<button class="btn secondary" onclick="loadXTTSVoices()" style="padding:4px 10px;font-size:11px;">Laden</button>
|
||||||
</div>
|
</div>
|
||||||
|
<div id="voice-status" style="font-size:11px;min-height:14px;margin-bottom:12px;color:#8888AA;"></div>
|
||||||
|
|
||||||
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
<!-- Gecloned Stimmen — Liste mit Loeschen -->
|
||||||
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
<div id="xtts-voice-list" style="margin-bottom:12px;"></div>
|
||||||
@@ -753,6 +781,11 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (msg.type === 'disk_status') {
|
||||||
|
updateDiskBanner(msg);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (msg.type === 'mode' && msg.payload) {
|
if (msg.type === 'mode' && msg.payload) {
|
||||||
// Bridge hat den Modus geaendert (evtl. von anderer App/Diagnostic) — UI syncen
|
// Bridge hat den Modus geaendert (evtl. von anderer App/Diagnostic) — UI syncen
|
||||||
const mode = (msg.payload.mode || '').toLowerCase();
|
const mode = (msg.payload.mode || '').toLowerCase();
|
||||||
@@ -819,6 +852,25 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (msg.type === 'voice_ready') {
|
||||||
|
const v = msg.payload?.voice || '';
|
||||||
|
const err = msg.payload?.error;
|
||||||
|
const ms = msg.payload?.loadMs;
|
||||||
|
const statusEl = document.getElementById('voice-status');
|
||||||
|
if (statusEl) {
|
||||||
|
if (err) {
|
||||||
|
statusEl.textContent = `⚠️ Stimme "${v}" Fehler: ${err}`;
|
||||||
|
statusEl.style.color = '#FF3B30';
|
||||||
|
} else {
|
||||||
|
statusEl.textContent = `✅ Stimme "${v || 'Standard'}" bereit${ms ? ` (${(ms/1000).toFixed(1)}s)` : ''}`;
|
||||||
|
statusEl.style.color = '#34C759';
|
||||||
|
}
|
||||||
|
setTimeout(() => { if (statusEl) statusEl.textContent = ''; }, 5000);
|
||||||
|
}
|
||||||
|
addLog('info', 'xtts', err ? `Voice "${v}": ${err}` : `Voice "${v || 'Standard'}" bereit`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (msg.type === 'watchdog') {
|
if (msg.type === 'watchdog') {
|
||||||
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
|
const colors = { warning: '#FFD60A', fixing: '#FF9500', fixed: '#34C759', error: '#FF3B30' };
|
||||||
const color = colors[msg.status] || '#FFD60A';
|
const color = colors[msg.status] || '#FFD60A';
|
||||||
@@ -1519,6 +1571,11 @@
|
|||||||
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
const xttsVoice = document.getElementById('diag-xtts-voice').value;
|
||||||
const whisperModel = document.getElementById('diag-whisper-model').value;
|
const whisperModel = document.getElementById('diag-whisper-model').value;
|
||||||
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
send({ action: 'send_voice_config', ttsEnabled, xttsVoice, whisperModel });
|
||||||
|
const statusEl = document.getElementById('voice-status');
|
||||||
|
if (statusEl && xttsVoice) {
|
||||||
|
statusEl.textContent = `⏳ Stimme "${xttsVoice}" wird geladen...`;
|
||||||
|
statusEl.style.color = '#FFD60A';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
|
// ── Passwort-Feld Anzeigen/Verbergen ─────────────────────
|
||||||
@@ -2155,6 +2212,53 @@
|
|||||||
const ttsToggleEl = document.getElementById('tts-debug-toggle');
|
const ttsToggleEl = document.getElementById('tts-debug-toggle');
|
||||||
if (ttsToggleEl) ttsToggleEl.checked = showTtsDebug;
|
if (ttsToggleEl) ttsToggleEl.checked = showTtsDebug;
|
||||||
|
|
||||||
|
// Disk-Space Banner aktualisieren (wird vom Server via disk_status gepusht)
|
||||||
|
function updateDiskBanner(status) {
|
||||||
|
const banner = document.getElementById('disk-banner');
|
||||||
|
const icon = document.getElementById('disk-banner-icon');
|
||||||
|
const text = document.getElementById('disk-banner-text');
|
||||||
|
if (!banner) return;
|
||||||
|
if (!status || status.level === 'ok') {
|
||||||
|
banner.style.display = 'none';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const gb = (n) => (n / 1024 / 1024 / 1024).toFixed(1);
|
||||||
|
const pct = status.percent;
|
||||||
|
const used = gb(status.usedBytes);
|
||||||
|
const total = gb(status.totalBytes);
|
||||||
|
const avail = gb(status.availBytes);
|
||||||
|
let bg, col, msg;
|
||||||
|
if (status.level === 'critical') {
|
||||||
|
bg = '#5C1A1A'; col = '#FF6B6B'; icon.innerHTML = '🚨'; // 🚨
|
||||||
|
msg = `KRITISCH: Platte ${pct}% voll (${used}GB von ${total}GB, nur noch ${avail}GB frei). aria-core kann bald nicht mehr schreiben — sofort aufraeumen!`;
|
||||||
|
} else if (status.level === 'warn') {
|
||||||
|
bg = '#5C3A1A'; col = '#FFAA55'; icon.innerHTML = '⚠️'; // ⚠️
|
||||||
|
msg = `Warnung: Platte ${pct}% voll (${avail}GB frei). Bald aufraeumen.`;
|
||||||
|
} else {
|
||||||
|
bg = '#4A3A1A'; col = '#FFD60A'; icon.innerHTML = 'ℹ️'; // ℹ️
|
||||||
|
msg = `Hinweis: Platte ${pct}% voll (${avail}GB frei).`;
|
||||||
|
}
|
||||||
|
banner.style.background = bg;
|
||||||
|
banner.style.color = col;
|
||||||
|
banner.style.borderBottom = `2px solid ${col}`;
|
||||||
|
text.textContent = msg;
|
||||||
|
banner.style.display = 'block';
|
||||||
|
}
|
||||||
|
|
||||||
|
function copyDiskCmd(variant) {
|
||||||
|
const cmd = variant === 'aggressive'
|
||||||
|
? 'docker system prune -a --volumes -f'
|
||||||
|
: 'docker builder prune -a -f && docker image prune -a -f';
|
||||||
|
navigator.clipboard.writeText(cmd).then(() => {
|
||||||
|
const btn = event.target;
|
||||||
|
const old = btn.textContent;
|
||||||
|
btn.textContent = 'Kopiert!';
|
||||||
|
setTimeout(() => { btn.textContent = old; }, 1500);
|
||||||
|
}).catch(() => {
|
||||||
|
alert('Kopieren fehlgeschlagen — Befehl: ' + cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
connectWS();
|
connectWS();
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
|
|||||||
@@ -626,6 +626,17 @@ function connectRVS(forcePlain) {
|
|||||||
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
|
// Mode-Broadcast von der Bridge → an Browser-Clients weiterreichen
|
||||||
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
|
log("info", "rvs", `Mode-Broadcast: ${msg.payload?.mode} (${msg.payload?.name})`);
|
||||||
broadcast({ type: "mode", payload: msg.payload });
|
broadcast({ type: "mode", payload: msg.payload });
|
||||||
|
} else if (msg.type === "voice_ready") {
|
||||||
|
// XTTS-Bridge meldet Stimme fertig geladen → an Browser durchreichen
|
||||||
|
const v = msg.payload?.voice || "";
|
||||||
|
const err = msg.payload?.error;
|
||||||
|
const ms = msg.payload?.loadMs;
|
||||||
|
if (err) {
|
||||||
|
log("warn", "rvs", `Voice-Ready Fehler fuer "${v}": ${err}`);
|
||||||
|
} else {
|
||||||
|
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
|
||||||
|
}
|
||||||
|
broadcast({ type: "voice_ready", payload: msg.payload });
|
||||||
} else {
|
} else {
|
||||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||||
}
|
}
|
||||||
@@ -1148,6 +1159,53 @@ function updateAgentActivity() {
|
|||||||
watchdogWarned = false;
|
watchdogWarned = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Disk-Space Monitor ───────────────────────────────
|
||||||
|
// Prueft regelmaessig die Host-Disk (via gemountetem /shared) und
|
||||||
|
// broadcastet bei kritischen Schwellwerten ein disk_status Event.
|
||||||
|
let lastDiskStatus = null;
|
||||||
|
let currentDiskStatus = null; // Vollstaendig fuer neu verbundene Clients
|
||||||
|
function checkDiskSpace() {
|
||||||
|
const { exec } = require("child_process");
|
||||||
|
exec("df -B1 /shared", (err, stdout) => {
|
||||||
|
if (err) return;
|
||||||
|
const lines = stdout.trim().split("\n");
|
||||||
|
if (lines.length < 2) return;
|
||||||
|
const cols = lines[1].split(/\s+/);
|
||||||
|
// Filesystem Size Used Avail Use% MountedOn
|
||||||
|
const total = parseInt(cols[1], 10);
|
||||||
|
const used = parseInt(cols[2], 10);
|
||||||
|
const avail = parseInt(cols[3], 10);
|
||||||
|
if (!total) return;
|
||||||
|
const pct = Math.round((used / total) * 100);
|
||||||
|
let level = "ok";
|
||||||
|
if (pct >= 95) level = "critical";
|
||||||
|
else if (pct >= 85) level = "warn";
|
||||||
|
else if (pct >= 70) level = "info";
|
||||||
|
const status = {
|
||||||
|
type: "disk_status",
|
||||||
|
level,
|
||||||
|
percent: pct,
|
||||||
|
usedBytes: used,
|
||||||
|
totalBytes: total,
|
||||||
|
availBytes: avail,
|
||||||
|
};
|
||||||
|
currentDiskStatus = status;
|
||||||
|
// Nur broadcasten wenn sich was geaendert hat (oder alle 60s Refresh)
|
||||||
|
const key = `${level}-${pct}`;
|
||||||
|
if (lastDiskStatus !== key) {
|
||||||
|
lastDiskStatus = key;
|
||||||
|
broadcast(status);
|
||||||
|
if (level !== "ok") {
|
||||||
|
log(level === "critical" ? "error" : "warn", "server",
|
||||||
|
`Disk ${pct}% belegt (${(used/1024/1024/1024).toFixed(1)}GB von ${(total/1024/1024/1024).toFixed(1)}GB)`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Beim Start + alle 30s
|
||||||
|
setTimeout(checkDiskSpace, 2000);
|
||||||
|
setInterval(checkDiskSpace, 30000);
|
||||||
|
|
||||||
// Watchdog prüft alle 30s ob ARIA nach einer gesendeten Nachricht reagiert
|
// Watchdog prüft alle 30s ob ARIA nach einer gesendeten Nachricht reagiert
|
||||||
setInterval(async () => {
|
setInterval(async () => {
|
||||||
if (pendingMessageTime === 0) return; // Keine Nachricht gesendet
|
if (pendingMessageTime === 0) return; // Keine Nachricht gesendet
|
||||||
@@ -1281,6 +1339,8 @@ wss.on("connection", (ws) => {
|
|||||||
browserClients.add(ws);
|
browserClients.add(ws);
|
||||||
// Initialen State + letzte Logs senden
|
// Initialen State + letzte Logs senden
|
||||||
ws.send(JSON.stringify({ type: "init", state, logs: logs.slice(-100) }));
|
ws.send(JSON.stringify({ type: "init", state, logs: logs.slice(-100) }));
|
||||||
|
// Letzten Disk-Status mitgeben damit der Client sofort weiss wie's um Platz steht
|
||||||
|
if (currentDiskStatus) ws.send(JSON.stringify(currentDiskStatus));
|
||||||
|
|
||||||
ws.on("message", (raw) => {
|
ws.on("message", (raw) => {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ const ALLOWED_TYPES = new Set([
|
|||||||
"agent_activity", "cancel_request",
|
"agent_activity", "cancel_request",
|
||||||
"audio_pcm",
|
"audio_pcm",
|
||||||
"xtts_delete_voice",
|
"xtts_delete_voice",
|
||||||
|
"voice_preload", "voice_ready",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Token-Raum: token -> { clients: Set<ws> }
|
// Token-Raum: token -> { clients: Set<ws> }
|
||||||
|
|||||||
+277
-99
@@ -69,6 +69,18 @@ function connectRVS(forcePlain) {
|
|||||||
await handleListVoices();
|
await handleListVoices();
|
||||||
} else if (msg.type === "xtts_delete_voice") {
|
} else if (msg.type === "xtts_delete_voice") {
|
||||||
await handleDeleteVoice(msg.payload);
|
await handleDeleteVoice(msg.payload);
|
||||||
|
} else if (msg.type === "voice_preload") {
|
||||||
|
await handleVoicePreload(msg.payload);
|
||||||
|
} else if (msg.type === "config") {
|
||||||
|
// Diagnostic hat globale Voice gewechselt → Preload damit der naechste
|
||||||
|
// Render ohne Ladewartezeit startet + alle Clients "voice_ready" sehen
|
||||||
|
const v = msg.payload && msg.payload.xttsVoice;
|
||||||
|
if (v && v !== lastDiagnosticVoice) {
|
||||||
|
lastDiagnosticVoice = v;
|
||||||
|
await handleVoicePreload({ voice: v, source: "diagnostic" });
|
||||||
|
} else if (!v) {
|
||||||
|
lastDiagnosticVoice = "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
log(`Fehler: ${err.message}`);
|
log(`Fehler: ${err.message}`);
|
||||||
@@ -95,7 +107,43 @@ function connectRVS(forcePlain) {
|
|||||||
|
|
||||||
// ── TTS Request Handler ─────────────────────────────
|
// ── TTS Request Handler ─────────────────────────────
|
||||||
|
|
||||||
async function handleTTSRequest(payload) {
|
/**
|
||||||
|
* Linearer Fade-In auf einen base64-PCM-Chunk (s16le).
|
||||||
|
* Mascht XTTS-Warmup-Glitches am Anfang eines Renders.
|
||||||
|
*/
|
||||||
|
function applyFadeIn(base64Pcm, sampleRate, channels, fadeMs) {
|
||||||
|
const buf = Buffer.from(base64Pcm, "base64");
|
||||||
|
const totalSamples = buf.length / 2; // s16le
|
||||||
|
const fadeSamples = Math.min(
|
||||||
|
Math.floor((fadeMs / 1000) * sampleRate) * channels,
|
||||||
|
totalSamples
|
||||||
|
);
|
||||||
|
for (let i = 0; i < fadeSamples; i++) {
|
||||||
|
const sample = buf.readInt16LE(i * 2);
|
||||||
|
const gain = i / fadeSamples;
|
||||||
|
buf.writeInt16LE(Math.round(sample * gain), i * 2);
|
||||||
|
}
|
||||||
|
return buf.toString("base64");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── TTS-Queue ──────────────────────────────────────
|
||||||
|
// XTTS verarbeitet Requests sequenziell, damit Streams sich nicht ueberlappen.
|
||||||
|
// Ohne Queue wuerden parallele Requests parallel streamen → App bekommt
|
||||||
|
// interleaved PCM-Chunks aus zwei Rendern → klingt wie Chaos.
|
||||||
|
let ttsQueue = Promise.resolve();
|
||||||
|
|
||||||
|
// Merkt sich die letzte in Diagnostic gewaehlte Voice, damit wir nicht bei jedem
|
||||||
|
// config-Broadcast (auch ohne Aenderung) einen Preload triggern.
|
||||||
|
let lastDiagnosticVoice = "";
|
||||||
|
|
||||||
|
function handleTTSRequest(payload) {
|
||||||
|
ttsQueue = ttsQueue.then(() => _runTTSRequest(payload)).catch(err => {
|
||||||
|
log(`TTS-Queue Fehler: ${err.message}`);
|
||||||
|
});
|
||||||
|
return ttsQueue;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function _runTTSRequest(payload) {
|
||||||
const { text, voice, requestId, language, messageId } = payload;
|
const { text, voice, requestId, language, messageId } = payload;
|
||||||
if (!text) return;
|
if (!text) return;
|
||||||
|
|
||||||
@@ -116,87 +164,88 @@ async function handleTTSRequest(payload) {
|
|||||||
.replace(/\(\)/g, "")
|
.replace(/\(\)/g, "")
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
// Satzweise Chunks (XTTS Modell laedt Context pro Call — Saetze gruppieren)
|
log(`TTS-Request (streaming): "${cleanText.slice(0, 80)}..." (${cleanText.length} chars, voice: ${voice || "default"})`);
|
||||||
const sentences = cleanText.split(/(?<=[.!?])\s+/)
|
|
||||||
.map(s => s.trim())
|
|
||||||
.filter(s => s.length > 0)
|
|
||||||
.map(s => s.replace(/[.]+$/, ''));
|
|
||||||
|
|
||||||
const MAX_CHUNK_CHARS = 150;
|
|
||||||
const chunks = [];
|
|
||||||
let currentChunk = '';
|
|
||||||
for (const sentence of sentences) {
|
|
||||||
if (currentChunk && (currentChunk.length + sentence.length + 2) > MAX_CHUNK_CHARS) {
|
|
||||||
chunks.push(currentChunk);
|
|
||||||
currentChunk = sentence;
|
|
||||||
} else {
|
|
||||||
currentChunk = currentChunk ? currentChunk + ', ' + sentence : sentence;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (currentChunk) chunks.push(currentChunk);
|
|
||||||
if (chunks.length === 0) return;
|
|
||||||
|
|
||||||
log(`TTS-Request (streaming): "${cleanText.slice(0, 60)}..." (${chunks.length} Chunks, voice: ${voice || "default"})`);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const voiceSample = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
// Im local-Mode erwartet daswer123 XTTS speaker_wav als Basename (ohne .wav,
|
||||||
const hasCustomVoice = voiceSample && fs.existsSync(voiceSample);
|
// ohne Pfad) — der Server prefixt EXAMPLE_FOLDER selbst. Wir checken hier
|
||||||
|
// nur das physische File ab um Warnungen zu loggen; runter ans API geht
|
||||||
|
// nur der Name.
|
||||||
|
const voiceFilePath = voice ? path.join(VOICES_DIR, `${voice}.wav`) : null;
|
||||||
|
const hasCustomVoice = voiceFilePath && fs.existsSync(voiceFilePath);
|
||||||
|
const speakerName = hasCustomVoice ? voice : "";
|
||||||
|
if (voice && !hasCustomVoice) {
|
||||||
|
log(`WARNUNG: Voice "${voice}" angefordert, aber ${voiceFilePath} existiert nicht — nehme Default`);
|
||||||
|
} else if (hasCustomVoice) {
|
||||||
|
log(`Voice "${voice}" verwendet (speaker_wav="${speakerName}")`);
|
||||||
|
}
|
||||||
|
|
||||||
let chunkIndex = 0;
|
let chunkIndex = 0;
|
||||||
// Audio-Format (aus WAV-Header extrahiert, einmal pro Request)
|
|
||||||
let pcmMeta = null;
|
let pcmMeta = null;
|
||||||
|
let firstChunkSeen = false;
|
||||||
|
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
const onChunk = (pcmBase64, meta) => {
|
||||||
const chunk = chunks[i];
|
if (!pcmMeta) pcmMeta = meta;
|
||||||
const isLastChunk = i === chunks.length - 1;
|
let outBase64 = pcmBase64;
|
||||||
try {
|
// Fade-In auf den ersten Chunk — maskiert XTTS-Warmup-Glitches
|
||||||
// Streaming: PCM-Frames werden nacheinander an RVS gepusht,
|
// (autoregressiver Generator hat am Anfang wenig Kontext → Artefakte).
|
||||||
// sobald sie vom XTTS-Server reinkommen
|
if (!firstChunkSeen && pcmBase64) {
|
||||||
await streamXTTSAsPCM(
|
firstChunkSeen = true;
|
||||||
chunk,
|
outBase64 = applyFadeIn(pcmBase64, meta.sampleRate, meta.channels, 120);
|
||||||
language || "de",
|
|
||||||
hasCustomVoice ? voiceSample : null,
|
|
||||||
(pcmBase64, meta) => {
|
|
||||||
if (!pcmMeta) pcmMeta = meta;
|
|
||||||
sendToRVS({
|
|
||||||
type: "audio_pcm",
|
|
||||||
payload: {
|
|
||||||
requestId: requestId || "",
|
|
||||||
messageId: messageId || "",
|
|
||||||
base64: pcmBase64,
|
|
||||||
format: "pcm_s16le",
|
|
||||||
sampleRate: meta.sampleRate,
|
|
||||||
channels: meta.channels,
|
|
||||||
voice: voice || "default",
|
|
||||||
chunk: chunkIndex++,
|
|
||||||
final: false,
|
|
||||||
},
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
// Nach letztem Text-Chunk: final-Flag senden damit App weiss "fertig"
|
|
||||||
if (isLastChunk && pcmMeta) {
|
|
||||||
sendToRVS({
|
|
||||||
type: "audio_pcm",
|
|
||||||
payload: {
|
|
||||||
requestId: requestId || "",
|
|
||||||
messageId: messageId || "",
|
|
||||||
base64: "",
|
|
||||||
format: "pcm_s16le",
|
|
||||||
sampleRate: pcmMeta.sampleRate,
|
|
||||||
channels: pcmMeta.channels,
|
|
||||||
voice: voice || "default",
|
|
||||||
chunk: chunkIndex++,
|
|
||||||
final: true,
|
|
||||||
},
|
|
||||||
timestamp: Date.now(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} catch (chunkErr) {
|
|
||||||
log(`TTS [${i + 1}/${chunks.length}] Fehler: ${chunkErr.message} — ueberspringe`);
|
|
||||||
}
|
}
|
||||||
|
sendToRVS({
|
||||||
|
type: "audio_pcm",
|
||||||
|
payload: {
|
||||||
|
requestId: requestId || "",
|
||||||
|
messageId: messageId || "",
|
||||||
|
base64: outBase64,
|
||||||
|
format: "pcm_s16le",
|
||||||
|
sampleRate: meta.sampleRate,
|
||||||
|
channels: meta.channels,
|
||||||
|
voice: voice || "default",
|
||||||
|
chunk: chunkIndex++,
|
||||||
|
final: false,
|
||||||
|
},
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// /tts_stream fuer echtes Streaming (funktioniert im XTTS local-Mode).
|
||||||
|
// Wenn Server im apiManual/api-Mode laeuft: 400 → Fallback auf /tts_to_audio/.
|
||||||
|
try {
|
||||||
|
await streamXTTSAsPCM(
|
||||||
|
cleanText,
|
||||||
|
language || "de",
|
||||||
|
speakerName,
|
||||||
|
onChunk,
|
||||||
|
);
|
||||||
|
} catch (streamErr) {
|
||||||
|
log(`/tts_stream fehlgeschlagen (${streamErr.message.slice(0, 100)}) — Fallback /tts_to_audio/`);
|
||||||
|
await streamXTTSBatch(
|
||||||
|
cleanText,
|
||||||
|
language || "de",
|
||||||
|
speakerName,
|
||||||
|
onChunk,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Am Ende: final-Flag damit App weiss "fertig" und Cache geschrieben werden kann
|
||||||
|
if (pcmMeta) {
|
||||||
|
sendToRVS({
|
||||||
|
type: "audio_pcm",
|
||||||
|
payload: {
|
||||||
|
requestId: requestId || "",
|
||||||
|
messageId: messageId || "",
|
||||||
|
base64: "",
|
||||||
|
format: "pcm_s16le",
|
||||||
|
sampleRate: pcmMeta.sampleRate,
|
||||||
|
channels: pcmMeta.channels,
|
||||||
|
voice: voice || "default",
|
||||||
|
chunk: chunkIndex++,
|
||||||
|
final: true,
|
||||||
|
},
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
log(`TTS komplett: ${chunkIndex} PCM-Frames gestreamt (${cleanText.length} chars)`);
|
log(`TTS komplett: ${chunkIndex} PCM-Frames gestreamt (${cleanText.length} chars)`);
|
||||||
@@ -211,45 +260,47 @@ async function handleTTSRequest(payload) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ruft /tts_to_audio/ auf und streamt das resultierende WAV bereits waehrend
|
* Ruft /tts_stream auf — echter Streaming-Endpoint bei daswer123.
|
||||||
* des Empfangs in PCM-Frames an den Callback. Der WAV-Header wird einmal
|
* Schickt was der Server verlangt (allow: GET), aber mit JSON-Body
|
||||||
* geparst, danach werden nur noch raw PCM-Samples weitergeleitet.
|
* als POST scheitert mit 405. Manche Versionen wollen GET + Query,
|
||||||
*
|
* andere POST + JSON. Testen was funktioniert.
|
||||||
* Warum nicht echtes /tts_stream/? daswer123 hat den Endpoint, aber die
|
|
||||||
* Audio-Quality ist dort niedriger und er produziert beim ersten Chunk
|
|
||||||
* oft Artefakte. Pragmatischer Weg: /tts_to_audio/ + Response-Stream
|
|
||||||
* chunkweise auslesen. Das ist zwar kein echtes Server-Streaming, aber
|
|
||||||
* gibt uns deutlich kleinere Netzwerk-Haeppchen und die App kann via
|
|
||||||
* AudioTrack MODE_STREAM sofort nahtlos abspielen.
|
|
||||||
*/
|
*/
|
||||||
function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
||||||
return new Promise((resolve, reject) => {
|
return new Promise((resolve, reject) => {
|
||||||
const body = JSON.stringify({
|
// Wichtig: speaker_wav MUSS als Query-Key dabei sein (Pydantic required) —
|
||||||
text,
|
// auch bei default-voice mit leerem Wert. Sonst gibt's HTTP 422.
|
||||||
language,
|
// stream_chunk_size=250: grosse Chunks = wenige Chunk-Grenzen = wenig
|
||||||
speaker_wav: speakerWav || "",
|
// Render-Artefakte. daswer123 erzeugt an Chunk-Boundaries haeufig Glitches
|
||||||
});
|
// in den Worten die ueber die Grenze gehen. Hoehere Latenz ist OK.
|
||||||
|
const qs = new URLSearchParams();
|
||||||
|
qs.set("text", text);
|
||||||
|
qs.set("language", language || "de");
|
||||||
|
qs.set("speaker_wav", speakerWav || "");
|
||||||
|
qs.set("stream_chunk_size", "250");
|
||||||
|
|
||||||
const url = new URL(`${XTTS_API_URL}/tts_to_audio/`);
|
const url = new URL(XTTS_API_URL);
|
||||||
|
const fullPath = `/tts_stream?${qs.toString()}`;
|
||||||
const options = {
|
const options = {
|
||||||
hostname: url.hostname,
|
hostname: url.hostname,
|
||||||
port: url.port,
|
port: url.port || 80,
|
||||||
path: url.pathname,
|
path: fullPath,
|
||||||
method: "POST",
|
method: "GET",
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Content-Length": Buffer.byteLength(body),
|
|
||||||
},
|
|
||||||
timeout: 60000,
|
timeout: 60000,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
log(`TTS GET /tts_stream?text=${text.slice(0, 30)}... (voice=${speakerWav ? "custom" : "default"})`);
|
||||||
|
|
||||||
const req = http.request(options, (res) => {
|
const req = http.request(options, (res) => {
|
||||||
if (res.statusCode !== 200) {
|
if (res.statusCode !== 200) {
|
||||||
let body = "";
|
let body = "";
|
||||||
res.on("data", (d) => { body += d.toString(); });
|
res.on("data", (d) => { body += d.toString(); });
|
||||||
res.on("end", () => reject(new Error(`XTTS HTTP ${res.statusCode}: ${body.slice(0, 200)}`)));
|
res.on("end", () => {
|
||||||
|
log(`XTTS /tts_stream ${res.statusCode}: ${body.slice(0, 300)}`);
|
||||||
|
reject(new Error(`XTTS HTTP ${res.statusCode}: ${body.slice(0, 200)}`));
|
||||||
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
log(`TTS stream verbunden, empfange PCM...`);
|
||||||
|
|
||||||
let headerParsed = false;
|
let headerParsed = false;
|
||||||
let sampleRate = 24000;
|
let sampleRate = 24000;
|
||||||
@@ -301,6 +352,76 @@ function streamXTTSAsPCM(text, language, speakerWav, onPcmChunk) {
|
|||||||
|
|
||||||
req.on("error", reject);
|
req.on("error", reject);
|
||||||
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
|
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS API Timeout (60s)")); });
|
||||||
|
req.end();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback: /tts_to_audio/ (POST JSON) — rendert komplett, dann response.
|
||||||
|
* Kein echtes Streaming, aber stabil als Backup wenn /tts_stream nicht geht.
|
||||||
|
* Shared chunking-Logik mit streamXTTSAsPCM — parst WAV-Header, stueckelt PCM.
|
||||||
|
*/
|
||||||
|
function streamXTTSBatch(text, language, speakerWav, onPcmChunk) {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const body = JSON.stringify({
|
||||||
|
text,
|
||||||
|
language: language || "de",
|
||||||
|
speaker_wav: speakerWav || "",
|
||||||
|
});
|
||||||
|
const url = new URL(XTTS_API_URL);
|
||||||
|
const options = {
|
||||||
|
hostname: url.hostname,
|
||||||
|
port: url.port || 80,
|
||||||
|
path: "/tts_to_audio/",
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Content-Length": Buffer.byteLength(body),
|
||||||
|
},
|
||||||
|
timeout: 60000,
|
||||||
|
};
|
||||||
|
|
||||||
|
const req = http.request(options, (res) => {
|
||||||
|
if (res.statusCode !== 200) {
|
||||||
|
let rb = "";
|
||||||
|
res.on("data", (d) => { rb += d.toString(); });
|
||||||
|
res.on("end", () => reject(new Error(`XTTS Batch HTTP ${res.statusCode}: ${rb.slice(0, 200)}`)));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let headerParsed = false;
|
||||||
|
let sampleRate = 24000;
|
||||||
|
let channels = 1;
|
||||||
|
let leftover = Buffer.alloc(0);
|
||||||
|
let headerBuf = Buffer.alloc(0);
|
||||||
|
const HEADER_BYTES = 44;
|
||||||
|
const PCM_CHUNK_BYTES = 8192;
|
||||||
|
|
||||||
|
res.on("data", (chunk) => {
|
||||||
|
let data = chunk;
|
||||||
|
if (!headerParsed) {
|
||||||
|
headerBuf = Buffer.concat([headerBuf, data]);
|
||||||
|
if (headerBuf.length < HEADER_BYTES) return;
|
||||||
|
const header = headerBuf.slice(0, HEADER_BYTES);
|
||||||
|
try { channels = header.readUInt16LE(22); sampleRate = header.readUInt32LE(24); } catch (_) {}
|
||||||
|
headerParsed = true;
|
||||||
|
data = headerBuf.slice(HEADER_BYTES);
|
||||||
|
}
|
||||||
|
let combined = Buffer.concat([leftover, data]);
|
||||||
|
while (combined.length >= PCM_CHUNK_BYTES) {
|
||||||
|
const slice = combined.slice(0, PCM_CHUNK_BYTES);
|
||||||
|
combined = combined.slice(PCM_CHUNK_BYTES);
|
||||||
|
onPcmChunk(slice.toString("base64"), { sampleRate, channels });
|
||||||
|
}
|
||||||
|
leftover = combined;
|
||||||
|
});
|
||||||
|
res.on("end", () => {
|
||||||
|
if (leftover.length > 0) onPcmChunk(leftover.toString("base64"), { sampleRate, channels });
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
res.on("error", reject);
|
||||||
|
});
|
||||||
|
req.on("error", reject);
|
||||||
|
req.on("timeout", () => { req.destroy(); reject(new Error("XTTS Batch Timeout (60s)")); });
|
||||||
req.write(body);
|
req.write(body);
|
||||||
req.end();
|
req.end();
|
||||||
});
|
});
|
||||||
@@ -365,6 +486,63 @@ async function handleDeleteVoice(payload) {
|
|||||||
|
|
||||||
// ── Voice List Handler ──────────────────────────────
|
// ── Voice List Handler ──────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preload einer Stimme — rendert stumm ein kurzes Dummy-Audio, damit XTTS
|
||||||
|
* die Speaker-Latents laedt und der naechste echte Request ohne Wartezeit
|
||||||
|
* loslegen kann. Broadcastet "voice_ready" wenn fertig (oder mit error).
|
||||||
|
*/
|
||||||
|
async function handleVoicePreload(payload) {
|
||||||
|
const voice = (payload && payload.voice) || "";
|
||||||
|
const source = (payload && payload.source) || "unknown";
|
||||||
|
const requestId = (payload && payload.requestId) || "";
|
||||||
|
log(`Voice-Preload angefordert: "${voice}" (source=${source})`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
let speakerName = "";
|
||||||
|
if (voice) {
|
||||||
|
const voiceFilePath = path.join(VOICES_DIR, `${voice}.wav`);
|
||||||
|
if (!fs.existsSync(voiceFilePath)) {
|
||||||
|
sendToRVS({
|
||||||
|
type: "voice_ready",
|
||||||
|
payload: { voice, requestId, error: "voice-file-not-found" },
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
log(`Preload abgebrochen: ${voiceFilePath} existiert nicht`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
speakerName = voice;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dummy-Request via Queue — damit sich Preload nicht mit echtem TTS ueberholt.
|
||||||
|
const t0 = Date.now();
|
||||||
|
await new Promise((resolve, reject) => {
|
||||||
|
ttsQueue = ttsQueue.then(async () => {
|
||||||
|
try {
|
||||||
|
await streamXTTSAsPCM("ja.", "de", speakerName, () => {});
|
||||||
|
resolve();
|
||||||
|
} catch (err) {
|
||||||
|
reject(err);
|
||||||
|
}
|
||||||
|
}).catch(reject);
|
||||||
|
});
|
||||||
|
const ms = Date.now() - t0;
|
||||||
|
log(`Voice "${voice || "default"}" geladen in ${ms}ms`);
|
||||||
|
|
||||||
|
sendToRVS({
|
||||||
|
type: "voice_ready",
|
||||||
|
payload: { voice, requestId, loadMs: ms },
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
} catch (err) {
|
||||||
|
log(`Voice-Preload Fehler: ${err.message}`);
|
||||||
|
sendToRVS({
|
||||||
|
type: "voice_ready",
|
||||||
|
payload: { voice, requestId, error: err.message.slice(0, 200) },
|
||||||
|
timestamp: Date.now(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function handleListVoices() {
|
async function handleListVoices() {
|
||||||
try {
|
try {
|
||||||
const files = fs.existsSync(VOICES_DIR)
|
const files = fs.existsSync(VOICES_DIR)
|
||||||
|
|||||||
@@ -33,6 +33,12 @@ services:
|
|||||||
- ./voices:/voices # Custom Voice Samples
|
- ./voices:/voices # Custom Voice Samples
|
||||||
environment:
|
environment:
|
||||||
- COQUI_TOS_AGREED=1
|
- COQUI_TOS_AGREED=1
|
||||||
|
# Local-Modus statt default "apiManual": Modell bleibt im GPU-VRAM,
|
||||||
|
# Render startet sofort, /tts_stream funktioniert.
|
||||||
|
# Default-CMD des Images liest diese ENV: -ms ${MODEL_SOURCE:-"apiManual"}
|
||||||
|
- MODEL_SOURCE=local
|
||||||
|
# Speaker-Folder auf unsere gemounteten voices zeigen lassen
|
||||||
|
- EXAMPLE_FOLDER=/voices
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
# ─── XTTS Bridge (verbindet zu RVS) ───────────
|
# ─── XTTS Bridge (verbindet zu RVS) ───────────
|
||||||
|
|||||||
Reference in New Issue
Block a user