Compare commits
11 Commits
v0.0.5.5
...
b373f915b5
| Author | SHA1 | Date | |
|---|---|---|---|
| b373f915b5 | |||
| 7748834a0f | |||
| 8b52f4c92b | |||
| dc20570f6d | |||
| 744a27cfd1 | |||
| 37c5f6c368 | |||
| a361015ff4 | |||
| d83b555209 | |||
| a029267d9d | |||
| 8ba6a71a49 | |||
| 2f625572fc |
@@ -79,8 +79,8 @@ android {
|
|||||||
applicationId "com.ariacockpit"
|
applicationId "com.ariacockpit"
|
||||||
minSdkVersion rootProject.ext.minSdkVersion
|
minSdkVersion rootProject.ext.minSdkVersion
|
||||||
targetSdkVersion rootProject.ext.targetSdkVersion
|
targetSdkVersion rootProject.ext.targetSdkVersion
|
||||||
versionCode 505
|
versionCode 506
|
||||||
versionName "0.0.5.5"
|
versionName "0.0.5.6"
|
||||||
// Fallback fuer Libraries mit Product Flavors
|
// Fallback fuer Libraries mit Product Flavors
|
||||||
missingDimensionStrategy 'react-native-camera', 'general'
|
missingDimensionStrategy 'react-native-camera', 'general'
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "aria-cockpit",
|
"name": "aria-cockpit",
|
||||||
"version": "0.0.5.5",
|
"version": "0.0.5.6",
|
||||||
"private": true,
|
"private": true,
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"android": "react-native run-android",
|
"android": "react-native run-android",
|
||||||
|
|||||||
@@ -108,6 +108,9 @@ const ChatScreen: React.FC = () => {
|
|||||||
const [searchVisible, setSearchVisible] = useState(false);
|
const [searchVisible, setSearchVisible] = useState(false);
|
||||||
const [pendingAttachments, setPendingAttachments] = useState<{file: any, isPhoto: boolean}[]>([]);
|
const [pendingAttachments, setPendingAttachments] = useState<{file: any, isPhoto: boolean}[]>([]);
|
||||||
const [agentActivity, setAgentActivity] = useState<{activity: string, tool: string}>({activity: 'idle', tool: ''});
|
const [agentActivity, setAgentActivity] = useState<{activity: string, tool: string}>({activity: 'idle', tool: ''});
|
||||||
|
// Service-Status (Gamebox: F5-TTS / Whisper Lade-Status) + Banner-Sichtbarkeit
|
||||||
|
const [serviceStatus, setServiceStatus] = useState<Record<string, {state: string, model?: string, loadSeconds?: number, error?: string}>>({});
|
||||||
|
const [serviceBannerDismissed, setServiceBannerDismissed] = useState(false);
|
||||||
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
// Gerätelokale TTS-Config: globaler Toggle (aus Settings) + temporäres Muten (Mund-Button)
|
||||||
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
const [ttsDeviceEnabled, setTtsDeviceEnabled] = useState(true);
|
||||||
const [ttsMuted, setTtsMuted] = useState(false);
|
const [ttsMuted, setTtsMuted] = useState(false);
|
||||||
@@ -351,6 +354,24 @@ const ChatScreen: React.FC = () => {
|
|||||||
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
|
ToastAndroid.show(`Stimme "${v || 'Standard'}" bereit`, ToastAndroid.SHORT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gamebox-Bridges (f5tts/whisper) melden Lade-Status — Banner oben
|
||||||
|
if (message.type === ('service_status' as any)) {
|
||||||
|
const p = message.payload as any;
|
||||||
|
const svc = (p?.service as string) || '';
|
||||||
|
if (!svc) return;
|
||||||
|
setServiceStatus(prev => ({
|
||||||
|
...prev,
|
||||||
|
[svc]: {
|
||||||
|
state: (p?.state as string) || 'unknown',
|
||||||
|
model: p?.model as string | undefined,
|
||||||
|
loadSeconds: p?.loadSeconds as number | undefined,
|
||||||
|
error: p?.error as string | undefined,
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
// Bei neuer Loading-Phase Banner wieder aktivieren
|
||||||
|
if (p?.state === 'loading') setServiceBannerDismissed(false);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const unsubState = rvs.onStateChange((state) => {
|
const unsubState = rvs.onStateChange((state) => {
|
||||||
@@ -764,6 +785,49 @@ const ChatScreen: React.FC = () => {
|
|||||||
</TouchableOpacity>
|
</TouchableOpacity>
|
||||||
</View>
|
</View>
|
||||||
|
|
||||||
|
{/* Service-Status Banner (Gamebox: F5-TTS / Whisper Lade-Status) */}
|
||||||
|
{(() => {
|
||||||
|
const entries = Object.entries(serviceStatus);
|
||||||
|
if (entries.length === 0 || serviceBannerDismissed) return null;
|
||||||
|
const anyLoading = entries.some(([, v]) => v.state === 'loading');
|
||||||
|
const anyError = entries.some(([, v]) => v.state === 'error');
|
||||||
|
const allReady = !anyLoading && !anyError && entries.every(([, v]) => v.state === 'ready');
|
||||||
|
const bg = anyError ? '#3A1F1F' : anyLoading ? '#3A331F' : '#1F3A2A';
|
||||||
|
const border = anyError ? '#FF3B30' : anyLoading ? '#FFD60A' : '#34C759';
|
||||||
|
const labels: Record<string, string> = { f5tts: 'F5-TTS', whisper: 'Whisper STT' };
|
||||||
|
return (
|
||||||
|
<TouchableOpacity
|
||||||
|
activeOpacity={allReady ? 0.6 : 1.0}
|
||||||
|
onPress={() => { if (allReady) setServiceBannerDismissed(true); }}
|
||||||
|
style={[styles.serviceBanner, { backgroundColor: bg, borderColor: border }]}
|
||||||
|
>
|
||||||
|
{entries.map(([svc, info]) => {
|
||||||
|
let icon = '\u23F3', text = '';
|
||||||
|
if (info.state === 'loading') {
|
||||||
|
text = `${labels[svc] || svc}: laedt${info.model ? ' ' + info.model : ''}...`;
|
||||||
|
} else if (info.state === 'ready') {
|
||||||
|
icon = '\u2705';
|
||||||
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
|
text = `${labels[svc] || svc}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
||||||
|
} else if (info.state === 'error') {
|
||||||
|
icon = '\u274C';
|
||||||
|
text = `${labels[svc] || svc}: Fehler ${info.error || ''}`;
|
||||||
|
} else {
|
||||||
|
text = `${labels[svc] || svc}: ${info.state}`;
|
||||||
|
}
|
||||||
|
return (
|
||||||
|
<Text key={svc} style={styles.serviceBannerLine}>
|
||||||
|
{icon} {text}
|
||||||
|
</Text>
|
||||||
|
);
|
||||||
|
})}
|
||||||
|
<Text style={styles.serviceBannerHint}>
|
||||||
|
{allReady ? 'Tippen zum Schliessen' : 'Bitte warten...'}
|
||||||
|
</Text>
|
||||||
|
</TouchableOpacity>
|
||||||
|
);
|
||||||
|
})()}
|
||||||
|
|
||||||
{/* Suchleiste */}
|
{/* Suchleiste */}
|
||||||
{searchVisible && (
|
{searchVisible && (
|
||||||
<View style={styles.searchBar}>
|
<View style={styles.searchBar}>
|
||||||
@@ -978,6 +1042,25 @@ const styles = StyleSheet.create({
|
|||||||
color: '#8888AA',
|
color: '#8888AA',
|
||||||
fontSize: 12,
|
fontSize: 12,
|
||||||
},
|
},
|
||||||
|
serviceBanner: {
|
||||||
|
paddingVertical: 8,
|
||||||
|
paddingHorizontal: 12,
|
||||||
|
borderTopWidth: 0,
|
||||||
|
borderBottomWidth: 1,
|
||||||
|
borderLeftWidth: 0,
|
||||||
|
borderRightWidth: 0,
|
||||||
|
},
|
||||||
|
serviceBannerLine: {
|
||||||
|
color: '#FFFFFF',
|
||||||
|
fontSize: 12,
|
||||||
|
lineHeight: 18,
|
||||||
|
},
|
||||||
|
serviceBannerHint: {
|
||||||
|
color: '#AAAACC',
|
||||||
|
fontSize: 10,
|
||||||
|
marginTop: 2,
|
||||||
|
fontStyle: 'italic',
|
||||||
|
},
|
||||||
messageList: {
|
messageList: {
|
||||||
padding: 12,
|
padding: 12,
|
||||||
paddingBottom: 8,
|
paddingBottom: 8,
|
||||||
|
|||||||
+41
-4
@@ -544,6 +544,10 @@ class ARIABridge:
|
|||||||
# STT-Requests die aktuell auf Antwort von der whisper-bridge (Gamebox) warten.
|
# STT-Requests die aktuell auf Antwort von der whisper-bridge (Gamebox) warten.
|
||||||
# requestId → Future mit dem Text (oder None bei Fehler).
|
# requestId → Future mit dem Text (oder None bei Fehler).
|
||||||
self._pending_stt: dict[str, asyncio.Future] = {}
|
self._pending_stt: dict[str, asyncio.Future] = {}
|
||||||
|
# whisper-bridge service_status: True wenn ready, False/None wenn loading/unbekannt.
|
||||||
|
# Beeinflusst das Timeout fuer stt_request — bei "loading" warten wir laenger,
|
||||||
|
# weil das Modell beim ersten Request noch ~1-2 Min runtergeladen werden kann.
|
||||||
|
self._remote_stt_ready: bool = False
|
||||||
|
|
||||||
def initialize(self) -> None:
|
def initialize(self) -> None:
|
||||||
"""Initialisiert alle Komponenten.
|
"""Initialisiert alle Komponenten.
|
||||||
@@ -1442,13 +1446,41 @@ class ARIABridge:
|
|||||||
future.set_result(text)
|
future.set_result(text)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
elif msg_type == "service_status":
|
||||||
|
# Gamebox-Bridges (whisper / f5tts) melden ihren Lade-Status.
|
||||||
|
# Wir nutzen das fuer den dynamischen STT-Timeout: solange whisper
|
||||||
|
# im 'loading' steckt, geben wir der Bridge mehr Zeit (Modell-Download
|
||||||
|
# kann 1-2 Min dauern), statt nach 45s lokal zu fallbacken.
|
||||||
|
svc = payload.get("service", "")
|
||||||
|
state = payload.get("state", "")
|
||||||
|
if svc == "whisper":
|
||||||
|
was_ready = self._remote_stt_ready
|
||||||
|
self._remote_stt_ready = (state == "ready")
|
||||||
|
if self._remote_stt_ready != was_ready:
|
||||||
|
logger.info("[rvs] whisper-bridge -> %s", state)
|
||||||
|
return
|
||||||
|
|
||||||
|
elif msg_type == "config_request":
|
||||||
|
# Eine andere Bridge (whisper/f5tts) bittet um die aktuelle Voice-
|
||||||
|
# Config — passiert wenn sie sich connected, weil sie sonst die
|
||||||
|
# Diagnostic-Settings nicht kennt. Wir broadcasten die persistierte
|
||||||
|
# Config (auch beim normalen Connect von aria-bridge selber, aber
|
||||||
|
# da war eventuell die andere Bridge noch nicht connected).
|
||||||
|
requester = payload.get("service", "?")
|
||||||
|
logger.info("[rvs] config_request von %s — broadcaste Voice-Config", requester)
|
||||||
|
asyncio.create_task(self._broadcast_persisted_config())
|
||||||
|
return
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.debug("[rvs] Unbekannter Typ: %s", msg_type)
|
logger.debug("[rvs] Unbekannter Typ: %s", msg_type)
|
||||||
|
|
||||||
# STT-Orchestrierung: zuerst Remote (Gamebox), Fallback lokal.
|
# STT-Orchestrierung: zuerst Remote (Gamebox), Fallback lokal.
|
||||||
# Timeout grosszuegig gewaehlt, damit auch ein erstmaliger Modell-Load
|
# Zwei Timeouts:
|
||||||
# auf der Gamebox (bis ~30s bei large-v3) durchgeht.
|
# ready=True → 45s reicht selbst fuer lange Audios
|
||||||
_STT_REMOTE_TIMEOUT_S = 45.0
|
# ready=False → 300s, weil das Modell evtl. noch heruntergeladen wird
|
||||||
|
# (large-v3 ~3GB, kann auf der Gamebox 1-2 Min dauern).
|
||||||
|
_STT_REMOTE_TIMEOUT_READY_S = 45.0
|
||||||
|
_STT_REMOTE_TIMEOUT_LOADING_S = 300.0
|
||||||
|
|
||||||
async def _process_app_audio(self, audio_b64: str, mime_type: str) -> None:
|
async def _process_app_audio(self, audio_b64: str, mime_type: str) -> None:
|
||||||
"""App-Audio → STT → aria-core. Primaer via whisper-bridge (RVS), Fallback lokal."""
|
"""App-Audio → STT → aria-core. Primaer via whisper-bridge (RVS), Fallback lokal."""
|
||||||
@@ -1514,7 +1546,12 @@ class ARIABridge:
|
|||||||
if not ok:
|
if not ok:
|
||||||
logger.warning("[rvs] stt_request konnte nicht gesendet werden — skip Remote")
|
logger.warning("[rvs] stt_request konnte nicht gesendet werden — skip Remote")
|
||||||
return None
|
return None
|
||||||
return await asyncio.wait_for(future, timeout=self._STT_REMOTE_TIMEOUT_S)
|
timeout_s = (self._STT_REMOTE_TIMEOUT_READY_S
|
||||||
|
if self._remote_stt_ready
|
||||||
|
else self._STT_REMOTE_TIMEOUT_LOADING_S)
|
||||||
|
logger.info("[rvs] STT-Timeout %ds (whisper-bridge %s)",
|
||||||
|
int(timeout_s), "ready" if self._remote_stt_ready else "loading")
|
||||||
|
return await asyncio.wait_for(future, timeout=timeout_s)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning("[rvs] Remote-STT Timeout (%.0fs)", self._STT_REMOTE_TIMEOUT_S)
|
logger.warning("[rvs] Remote-STT Timeout (%.0fs)", self._STT_REMOTE_TIMEOUT_S)
|
||||||
return None
|
return None
|
||||||
|
|||||||
+83
-5
@@ -127,6 +127,15 @@
|
|||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
<!-- Service-Status Banner unten rechts (Gamebox: F5-TTS / Whisper Lade-Status) -->
|
||||||
|
<div id="service-status-banner" style="display:none;position:fixed;bottom:16px;right:16px;z-index:999;background:#1E1E2E;border:1px solid #2A2A3E;border-radius:8px;padding:10px 14px;font-size:12px;color:#fff;min-width:240px;max-width:360px;box-shadow:0 4px 14px rgba(0,0,0,0.5);">
|
||||||
|
<div style="display:flex;align-items:flex-start;gap:8px;">
|
||||||
|
<span id="service-status-icon" style="font-size:18px;line-height:1;">⏳</span>
|
||||||
|
<div id="service-status-list" style="flex:1;display:flex;flex-direction:column;gap:6px;"></div>
|
||||||
|
<button id="service-status-close" onclick="document.getElementById('service-status-banner').style.display='none'" style="background:none;border:none;color:#666680;font-size:16px;cursor:pointer;padding:0;line-height:1;display:none;">×</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Disk-Space Warnung (dynamisch gesetzt) -->
|
<!-- Disk-Space Warnung (dynamisch gesetzt) -->
|
||||||
<div id="disk-banner" style="display:none;position:sticky;top:0;z-index:500;padding:10px 14px;border-radius:0;margin:-16px -16px 12px -16px;font-size:13px;">
|
<div id="disk-banner" style="display:none;position:sticky;top:0;z-index:500;padding:10px 14px;border-radius:0;margin:-16px -16px 12px -16px;font-size:13px;">
|
||||||
<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;">
|
<div style="display:flex;align-items:center;gap:10px;flex-wrap:wrap;">
|
||||||
@@ -460,23 +469,25 @@
|
|||||||
Hardcoded Defaults: F5TTS_v1_Base, cfg_strength=2.5, nfe_step=32.
|
Hardcoded Defaults: F5TTS_v1_Base, cfg_strength=2.5, nfe_step=32.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<label style="color:#8888AA;font-size:12px;">Modell-ID:</label>
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
|
Modell-Architektur (F5TTS_v1_Base = Default multilingual, F5TTS_Base = fuer die meisten Fine-Tunes):
|
||||||
|
</label>
|
||||||
<input type="text" id="diag-f5tts-model"
|
<input type="text" id="diag-f5tts-model"
|
||||||
placeholder="F5TTS_v1_Base"
|
placeholder="F5TTS_v1_Base"
|
||||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
<label style="color:#8888AA;font-size:12px;">
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
Custom Checkpoint (HF-Repo "user/repo" oder Container-Pfad, leer = Default):
|
Custom Checkpoint — HF-Pfad (hf://user/repo/file) oder lokaler Container-Pfad. Leer = Default.
|
||||||
</label>
|
</label>
|
||||||
<input type="text" id="diag-f5tts-ckpt"
|
<input type="text" id="diag-f5tts-ckpt"
|
||||||
placeholder="z.B. aoxo/F5-TTS-German"
|
placeholder="z.B. hf://aihpi/F5-TTS-German/F5TTS_Base/model_365000.safetensors"
|
||||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
<label style="color:#8888AA;font-size:12px;">
|
<label style="color:#8888AA;font-size:12px;">
|
||||||
Custom Vocab (passend zum Checkpoint, optional):
|
Custom Vocab — muss zum Checkpoint passen. Leer = Default.
|
||||||
</label>
|
</label>
|
||||||
<input type="text" id="diag-f5tts-vocab"
|
<input type="text" id="diag-f5tts-vocab"
|
||||||
placeholder="leer = Default"
|
placeholder="z.B. hf://aihpi/F5-TTS-German/vocab.txt"
|
||||||
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
style="background:#1E1E2E;color:#fff;border:1px solid #2A2A3E;border-radius:6px;padding:6px 10px;font-size:13px;">
|
||||||
|
|
||||||
<div style="display:flex;gap:12px;">
|
<div style="display:flex;gap:12px;">
|
||||||
@@ -914,6 +925,11 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (msg.type === 'service_status') {
|
||||||
|
updateServiceStatus(msg.payload || {});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (msg.type === 'voice_ready') {
|
if (msg.type === 'voice_ready') {
|
||||||
const v = msg.payload?.voice || '';
|
const v = msg.payload?.voice || '';
|
||||||
const err = msg.payload?.error;
|
const err = msg.payload?.error;
|
||||||
@@ -1452,6 +1468,68 @@
|
|||||||
'Glob': '\uD83D\uDCC1 Dateien suchen',
|
'Glob': '\uD83D\uDCC1 Dateien suchen',
|
||||||
'Agent': '\uD83E\uDD16 Sub-Agent',
|
'Agent': '\uD83E\uDD16 Sub-Agent',
|
||||||
};
|
};
|
||||||
|
// ── Service-Status Banner (Gamebox: F5-TTS / Whisper Lade-Status) ──
|
||||||
|
// Aggregiert die Status-Infos der Bridges. Wenn irgendwas am Laden
|
||||||
|
// ist, zeigt das Banner unten rechts. Sobald alles auf 'ready' ist,
|
||||||
|
// bleibt's einen Moment und wird dann vom User weggeklickt (oder
|
||||||
|
// nach 8s automatisch).
|
||||||
|
const _serviceState = {}; // { f5tts: {state, model, ...}, whisper: {...} }
|
||||||
|
let _serviceFadeTimer = null;
|
||||||
|
function updateServiceStatus(p) {
|
||||||
|
const svc = p.service || '?';
|
||||||
|
_serviceState[svc] = p;
|
||||||
|
|
||||||
|
const banner = document.getElementById('service-status-banner');
|
||||||
|
const list = document.getElementById('service-status-list');
|
||||||
|
const icon = document.getElementById('service-status-icon');
|
||||||
|
const closeBtn = document.getElementById('service-status-close');
|
||||||
|
|
||||||
|
// Liste neu aufbauen
|
||||||
|
list.innerHTML = '';
|
||||||
|
let anyLoading = false, anyError = false;
|
||||||
|
const labels = { f5tts: 'F5-TTS', whisper: 'Whisper STT' };
|
||||||
|
for (const [s, info] of Object.entries(_serviceState)) {
|
||||||
|
const row = document.createElement('div');
|
||||||
|
row.style.cssText = 'display:flex;align-items:center;gap:6px;';
|
||||||
|
let dot = '⚫', color = '#666680', text = '';
|
||||||
|
if (info.state === 'loading') {
|
||||||
|
dot = '⏳'; color = '#FFD60A'; anyLoading = true;
|
||||||
|
text = `${labels[s] || s}: laedt${info.model ? ' ' + info.model : ''}...`;
|
||||||
|
} else if (info.state === 'ready') {
|
||||||
|
dot = '✅'; color = '#34C759';
|
||||||
|
const sec = info.loadSeconds ? ` (${info.loadSeconds.toFixed(1)}s)` : '';
|
||||||
|
text = `${labels[s] || s}: bereit${info.model ? ' ' + info.model : ''}${sec}`;
|
||||||
|
} else if (info.state === 'error') {
|
||||||
|
dot = '❌'; color = '#FF3B30'; anyError = true;
|
||||||
|
text = `${labels[s] || s}: Fehler ${info.error || ''}`;
|
||||||
|
} else {
|
||||||
|
text = `${labels[s] || s}: ${info.state}`;
|
||||||
|
}
|
||||||
|
row.innerHTML = `<span style="color:${color}">${dot}</span><span>${text}</span>`;
|
||||||
|
list.appendChild(row);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Icon spiegelt Gesamt-Status
|
||||||
|
if (anyError) icon.innerHTML = '❌';
|
||||||
|
else if (anyLoading) icon.innerHTML = '⏳';
|
||||||
|
else icon.innerHTML = '✅';
|
||||||
|
|
||||||
|
banner.style.display = 'block';
|
||||||
|
|
||||||
|
// Wenn alles ready (kein Loading, kein Error): X-Button anzeigen
|
||||||
|
// + nach 8s automatisch wegfaden
|
||||||
|
if (!anyLoading && !anyError) {
|
||||||
|
closeBtn.style.display = 'block';
|
||||||
|
clearTimeout(_serviceFadeTimer);
|
||||||
|
_serviceFadeTimer = setTimeout(() => {
|
||||||
|
banner.style.display = 'none';
|
||||||
|
}, 8000);
|
||||||
|
} else {
|
||||||
|
closeBtn.style.display = 'none';
|
||||||
|
clearTimeout(_serviceFadeTimer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function updateThinkingIndicator(msg) {
|
function updateThinkingIndicator(msg) {
|
||||||
const indicators = [
|
const indicators = [
|
||||||
document.getElementById('thinking-indicator'),
|
document.getElementById('thinking-indicator'),
|
||||||
|
|||||||
@@ -637,6 +637,22 @@ function connectRVS(forcePlain) {
|
|||||||
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
|
log("info", "rvs", `Voice "${v || "default"}" geladen${ms ? ` in ${(ms/1000).toFixed(1)}s` : ""}`);
|
||||||
}
|
}
|
||||||
broadcast({ type: "voice_ready", payload: msg.payload });
|
broadcast({ type: "voice_ready", payload: msg.payload });
|
||||||
|
} else if (msg.type === "service_status") {
|
||||||
|
// Gamebox-Bridges (f5tts/whisper) melden ihren Lade-Status —
|
||||||
|
// an Browser durchreichen fuer das Banner unten rechts
|
||||||
|
const svc = msg.payload?.service || "?";
|
||||||
|
const state = msg.payload?.state || "?";
|
||||||
|
const model = msg.payload?.model || "";
|
||||||
|
const sec = msg.payload?.loadSeconds;
|
||||||
|
const err = msg.payload?.error;
|
||||||
|
if (err) {
|
||||||
|
log("warn", "rvs", `service_status ${svc}: ${err}`);
|
||||||
|
} else if (state === "ready" && sec) {
|
||||||
|
log("info", "rvs", `service_status ${svc} ready (${model}, ${sec.toFixed(1)}s)`);
|
||||||
|
} else {
|
||||||
|
log("info", "rvs", `service_status ${svc} ${state}${model ? ` (${model})` : ""}`);
|
||||||
|
}
|
||||||
|
broadcast({ type: "service_status", payload: msg.payload });
|
||||||
} else {
|
} else {
|
||||||
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
log("debug", "rvs", `Nachricht: ${JSON.stringify(msg).slice(0, 150)}`);
|
||||||
}
|
}
|
||||||
|
|||||||
+6
-1
@@ -21,6 +21,8 @@ const ALLOWED_TYPES = new Set([
|
|||||||
"xtts_delete_voice",
|
"xtts_delete_voice",
|
||||||
"voice_preload", "voice_ready",
|
"voice_preload", "voice_ready",
|
||||||
"stt_request", "stt_response",
|
"stt_request", "stt_response",
|
||||||
|
"service_status",
|
||||||
|
"config_request",
|
||||||
]);
|
]);
|
||||||
|
|
||||||
// Token-Raum: token -> { clients: Set<ws> }
|
// Token-Raum: token -> { clients: Set<ws> }
|
||||||
@@ -53,7 +55,10 @@ function cleanupRooms() {
|
|||||||
|
|
||||||
// ── WebSocket-Server starten ────────────────────────────────────────
|
// ── WebSocket-Server starten ────────────────────────────────────────
|
||||||
|
|
||||||
const wss = new WebSocketServer({ port: PORT });
|
// maxPayload 50MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||||
|
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
||||||
|
// Default-Limit war der Killer fuer die voice_upload Pipeline.
|
||||||
|
const wss = new WebSocketServer({ port: PORT, maxPayload: 50 * 1024 * 1024 });
|
||||||
|
|
||||||
wss.on("listening", () => {
|
wss.on("listening", () => {
|
||||||
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
||||||
|
|||||||
+2
-2
@@ -1,5 +1,5 @@
|
|||||||
# HuggingFace Model-Cache (geteilt zwischen f5tts + whisper bridge,
|
# HuggingFace Model-Cache (Whisper + F5-TTS, geteilt zwischen den
|
||||||
# wird via Bind-Mount in die Container reingehaengt)
|
# beiden Bridges via Bind-Mount, kann mehrere GB werden)
|
||||||
hf-cache/
|
hf-cache/
|
||||||
|
|
||||||
# Voice-Samples (lokal, gehoert nicht ins Repo)
|
# Voice-Samples (lokal, gehoert nicht ins Repo)
|
||||||
|
|||||||
@@ -33,8 +33,8 @@ services:
|
|||||||
- ./voices:/voices # WAV + TXT Referenz
|
- ./voices:/voices # WAV + TXT Referenz
|
||||||
- ./hf-cache:/root/.cache/huggingface # HF-Cache als Bind-Mount.
|
- ./hf-cache:/root/.cache/huggingface # HF-Cache als Bind-Mount.
|
||||||
# Direkt sichtbar im xtts/hf-cache/,
|
# Direkt sichtbar im xtts/hf-cache/,
|
||||||
# einfach zu loeschen, kein Docker-
|
# einfach manuell zu loeschen, kein
|
||||||
# Desktop .vhdx Bloat.
|
# Docker-Desktop .vhdx Bloat.
|
||||||
# Wird mit whisper-bridge geteilt.
|
# Wird mit whisper-bridge geteilt.
|
||||||
environment:
|
environment:
|
||||||
# Bootstrap-only — alle anderen F5-TTS-Settings (Modell, cfg_strength,
|
# Bootstrap-only — alle anderen F5-TTS-Settings (Modell, cfg_strength,
|
||||||
@@ -78,5 +78,8 @@ services:
|
|||||||
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
|
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
|
||||||
- WHISPER_LANGUAGE=${WHISPER_LANGUAGE:-de}
|
- WHISPER_LANGUAGE=${WHISPER_LANGUAGE:-de}
|
||||||
volumes:
|
volumes:
|
||||||
- ./hf-cache:/root/.cache/huggingface # gleicher Cache wie f5tts-bridge
|
- ./hf-cache:/root/.cache/huggingface # gleicher Cache wie f5tts-bridge —
|
||||||
|
# ein Modell muss nur einmal pro
|
||||||
|
# Maschine geladen werden, kein
|
||||||
|
# Re-Download bei Container-Restart.
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
+155
-28
@@ -73,6 +73,12 @@ VOICES_DIR = Path(os.getenv("VOICES_DIR", "/voices"))
|
|||||||
|
|
||||||
PCM_CHUNK_BYTES = 8192 # ~170ms @ 24kHz mono s16
|
PCM_CHUNK_BYTES = 8192 # ~170ms @ 24kHz mono s16
|
||||||
TARGET_SR = 24000 # F5-TTS native
|
TARGET_SR = 24000 # F5-TTS native
|
||||||
|
# F5-TTS hat ein 12s Hard-Limit fuer Referenz-Audio. Laengere WAVs werden
|
||||||
|
# vom Modell stumm abgeschnitten — aber unser ref_text bleibt lang und passt
|
||||||
|
# dann nicht mehr zum gekuerzten Audio (Quali leidet, warmup-Render ist
|
||||||
|
# unnoetig lange). Wir clippen explizit auf 10s + re-transkribieren den Text
|
||||||
|
# damit beide synchron bleiben.
|
||||||
|
REF_MAX_SECONDS = 10.0
|
||||||
|
|
||||||
# Wird in einer Uebergangsphase als "ungueltige Referenz" erkannt (alte voices,
|
# Wird in einer Uebergangsphase als "ungueltige Referenz" erkannt (alte voices,
|
||||||
# die hochgeladen wurden bevor die whisper-bridge online war). Bei Erkennung
|
# die hochgeladen wurden bevor die whisper-bridge online war). Bei Erkennung
|
||||||
@@ -93,6 +99,33 @@ def _get_f5tts_cls():
|
|||||||
return _F5TTS_cls
|
return _F5TTS_cls
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_hf_path(p: str) -> str:
|
||||||
|
"""Wenn p mit 'hf://' anfaengt → aus HuggingFace Hub runterladen,
|
||||||
|
lokalen Pfad zurueckgeben. Sonst unveraendert.
|
||||||
|
|
||||||
|
Format: hf://user/repo/path/to/file.ext
|
||||||
|
Beispiel: hf://aihpi/F5-TTS-German/F5TTS_Base/model_365000.safetensors
|
||||||
|
"""
|
||||||
|
if not p or not p.startswith("hf://"):
|
||||||
|
return p
|
||||||
|
try:
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
rest = p[5:]
|
||||||
|
parts = rest.split("/", 2)
|
||||||
|
if len(parts) < 3:
|
||||||
|
logger.warning("Ungueltiges hf:// Format: %s (erwarte hf://user/repo/path)", p)
|
||||||
|
return p
|
||||||
|
repo_id = f"{parts[0]}/{parts[1]}"
|
||||||
|
filename = parts[2]
|
||||||
|
logger.info("HF-Download: %s aus %s", filename, repo_id)
|
||||||
|
local = hf_hub_download(repo_id=repo_id, filename=filename)
|
||||||
|
logger.info("HF-Download fertig: %s", local)
|
||||||
|
return local
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("HF-Download fehlgeschlagen fuer %s: %s", p, e)
|
||||||
|
return p
|
||||||
|
|
||||||
|
|
||||||
class F5Runner:
|
class F5Runner:
|
||||||
"""Haelt das F5-TTS-Modell. Synthese laeuft im Executor (blocking).
|
"""Haelt das F5-TTS-Modell. Synthese laeuft im Executor (blocking).
|
||||||
|
|
||||||
@@ -110,20 +143,28 @@ class F5Runner:
|
|||||||
self.vocab_file: str = DEFAULT_F5TTS_VOCAB_FILE
|
self.vocab_file: str = DEFAULT_F5TTS_VOCAB_FILE
|
||||||
self.cfg_strength: float = DEFAULT_F5TTS_CFG_STRENGTH
|
self.cfg_strength: float = DEFAULT_F5TTS_CFG_STRENGTH
|
||||||
self.nfe_step: int = DEFAULT_F5TTS_NFE_STEP
|
self.nfe_step: int = DEFAULT_F5TTS_NFE_STEP
|
||||||
|
# Last load-time fuer service_status Broadcast
|
||||||
|
self.last_load_seconds: float = 0.0
|
||||||
|
self._load_started_at: float = 0.0
|
||||||
|
|
||||||
def _load_blocking(self) -> None:
|
def _load_blocking(self) -> None:
|
||||||
cls = _get_f5tts_cls()
|
cls = _get_f5tts_cls()
|
||||||
|
ckpt_resolved = _resolve_hf_path(self.ckpt_file) if self.ckpt_file else ""
|
||||||
|
vocab_resolved = _resolve_hf_path(self.vocab_file) if self.vocab_file else ""
|
||||||
logger.info("Lade F5-TTS '%s' (device=%s, ckpt=%s)...",
|
logger.info("Lade F5-TTS '%s' (device=%s, ckpt=%s)...",
|
||||||
self.model_id, F5TTS_DEVICE, self.ckpt_file or "default")
|
self.model_id, F5TTS_DEVICE, ckpt_resolved or "default")
|
||||||
t0 = time.time()
|
self._load_started_at = time.time()
|
||||||
kwargs = {"model": self.model_id, "device": F5TTS_DEVICE}
|
kwargs = {"model": self.model_id, "device": F5TTS_DEVICE}
|
||||||
if self.ckpt_file:
|
if ckpt_resolved:
|
||||||
kwargs["ckpt_file"] = self.ckpt_file
|
kwargs["ckpt_file"] = ckpt_resolved
|
||||||
if self.vocab_file:
|
if vocab_resolved:
|
||||||
kwargs["vocab_file"] = self.vocab_file
|
kwargs["vocab_file"] = vocab_resolved
|
||||||
self.model = cls(**kwargs)
|
self.model = cls(**kwargs)
|
||||||
|
elapsed = time.time() - self._load_started_at
|
||||||
logger.info("F5-TTS geladen in %.1fs (cfg_strength=%.1f, nfe=%d)",
|
logger.info("F5-TTS geladen in %.1fs (cfg_strength=%.1f, nfe=%d)",
|
||||||
time.time() - t0, self.cfg_strength, self.nfe_step)
|
elapsed, self.cfg_strength, self.nfe_step)
|
||||||
|
# Wird von outside (run_loop) gelesen um service_status auf 'ready' zu setzen
|
||||||
|
self.last_load_seconds = elapsed
|
||||||
|
|
||||||
async def ensure_loaded(self) -> None:
|
async def ensure_loaded(self) -> None:
|
||||||
async with self._lock:
|
async with self._lock:
|
||||||
@@ -242,32 +283,51 @@ def voice_paths(name: str) -> tuple[Path, Path]:
|
|||||||
return VOICES_DIR / f"{safe}.wav", VOICES_DIR / f"{safe}.txt"
|
return VOICES_DIR / f"{safe}.wav", VOICES_DIR / f"{safe}.txt"
|
||||||
|
|
||||||
|
|
||||||
def ensure_24k_mono_wav(src_wav: Path) -> Path:
|
def normalize_ref_wav(src_wav: Path, max_seconds: float = REF_MAX_SECONDS) -> tuple[Path, bool]:
|
||||||
"""F5-TTS moechte 24kHz mono als Referenz — ffmpeg konvertiert inplace.
|
"""Bringt die Referenz-WAV in F5-TTS-freundliche Form:
|
||||||
|
|
||||||
Wenn das File schon passt, wird nichts geaendert. Sonst wird es
|
* 24kHz mono
|
||||||
reingeschrieben (Original wird ueberschrieben).
|
* max max_seconds Dauer
|
||||||
|
* Stille am Anfang + Ende abgeschnitten (silenceremove-Filter)
|
||||||
|
* Lautheit auf -16 LUFS normalisiert (loudnorm-Filter) damit
|
||||||
|
das Modell konsistente Amplituden sieht
|
||||||
|
|
||||||
|
F5-TTS reagiert empfindlich auf leise / verrauschte / zerhackte
|
||||||
|
Referenzen. Konsistente, saubere Input-Lautheit hilft der Quali.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(path, was_modified) — was_modified=True wenn die Datei wirklich
|
||||||
|
geaendert wurde (Caller sollte dann den passenden .txt invalidieren).
|
||||||
"""
|
"""
|
||||||
try:
|
|
||||||
info = sf.info(str(src_wav))
|
|
||||||
if info.samplerate == TARGET_SR and info.channels == 1:
|
|
||||||
return src_wav
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
tmp_out = src_wav.with_suffix(".conv.wav")
|
tmp_out = src_wav.with_suffix(".conv.wav")
|
||||||
|
# silenceremove am Anfang: bis -50dB gesprochen wird
|
||||||
|
# silenceremove am Ende: ueber -50dB rein, dann 0.5s stille als Cutoff
|
||||||
|
# loudnorm: EBU R128, Ziel -16 LUFS
|
||||||
|
af = ("silenceremove=start_periods=1:start_duration=0.05:start_threshold=-50dB,"
|
||||||
|
"silenceremove=stop_periods=1:stop_duration=0.5:stop_threshold=-50dB,"
|
||||||
|
"loudnorm=I=-16:TP=-1.5:LRA=11")
|
||||||
cmd = ["ffmpeg", "-y", "-i", str(src_wav),
|
cmd = ["ffmpeg", "-y", "-i", str(src_wav),
|
||||||
"-ar", str(TARGET_SR), "-ac", "1", "-f", "wav", str(tmp_out)]
|
"-af", af,
|
||||||
|
"-ar", str(TARGET_SR), "-ac", "1",
|
||||||
|
"-t", str(max_seconds),
|
||||||
|
"-f", "wav", str(tmp_out)]
|
||||||
r = subprocess.run(cmd, capture_output=True, timeout=30)
|
r = subprocess.run(cmd, capture_output=True, timeout=30)
|
||||||
if r.returncode != 0:
|
if r.returncode != 0:
|
||||||
logger.warning("ffmpeg-Konvertierung von %s fehlgeschlagen: %s",
|
logger.warning("ffmpeg-Normalisierung von %s fehlgeschlagen: %s",
|
||||||
src_wav, r.stderr.decode(errors="replace")[:200])
|
src_wav, r.stderr.decode(errors="replace")[:300])
|
||||||
try:
|
try:
|
||||||
tmp_out.unlink()
|
tmp_out.unlink()
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
return src_wav
|
return src_wav, False
|
||||||
os.replace(tmp_out, src_wav)
|
os.replace(tmp_out, src_wav)
|
||||||
return src_wav
|
try:
|
||||||
|
info = sf.info(str(src_wav))
|
||||||
|
logger.info("Referenz-WAV normalisiert: %s (%.1fs, %dHz mono, -16 LUFS, silence getrimmt)",
|
||||||
|
src_wav.name, info.duration, info.samplerate)
|
||||||
|
except Exception:
|
||||||
|
logger.info("Referenz-WAV normalisiert: %s", src_wav.name)
|
||||||
|
return src_wav, True
|
||||||
|
|
||||||
|
|
||||||
async def _send(ws, mtype: str, payload: dict) -> None:
|
async def _send(ws, mtype: str, payload: dict) -> None:
|
||||||
@@ -343,6 +403,21 @@ async def _do_tts(ws, runner: F5Runner, text: str, voice: str,
|
|||||||
t0 = time.time()
|
t0 = time.time()
|
||||||
ref_wav_path, ref_txt_path = voice_paths(voice) if voice else (None, None)
|
ref_wav_path, ref_txt_path = voice_paths(voice) if voice else (None, None)
|
||||||
|
|
||||||
|
# WAV zu lang? F5-TTS limitiert intern auf 12s, dann passt der txt nicht
|
||||||
|
# mehr zum Audio. Wir clippen explizit auf 10s und invalidieren den txt,
|
||||||
|
# damit er on-the-fly passend zum gekuerzten Audio neu transkribiert wird.
|
||||||
|
if voice and ref_wav_path and ref_wav_path.exists():
|
||||||
|
try:
|
||||||
|
info = sf.info(str(ref_wav_path))
|
||||||
|
if info.duration > REF_MAX_SECONDS + 0.5:
|
||||||
|
logger.info("Voice '%s' WAV ist %.1fs (>%.0fs) → clippen + txt neu",
|
||||||
|
voice, info.duration, REF_MAX_SECONDS)
|
||||||
|
_, modified = normalize_ref_wav(ref_wav_path)
|
||||||
|
if modified and ref_txt_path and ref_txt_path.exists():
|
||||||
|
ref_txt_path.unlink()
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Konnte WAV-Dauer nicht pruefen: %s", e)
|
||||||
|
|
||||||
# Legacy-Platzhalter erkennen → behandeln als "kein txt" und neu transkribieren
|
# Legacy-Platzhalter erkennen → behandeln als "kein txt" und neu transkribieren
|
||||||
if voice and ref_txt_path and ref_txt_path.exists():
|
if voice and ref_txt_path and ref_txt_path.exists():
|
||||||
try:
|
try:
|
||||||
@@ -485,8 +560,9 @@ async def handle_voice_upload(ws, payload: dict) -> None:
|
|||||||
size_kb = wav_path.stat().st_size / 1024
|
size_kb = wav_path.stat().st_size / 1024
|
||||||
logger.info("Voice WAV gespeichert: %s (%.0fKB)", wav_path, size_kb)
|
logger.info("Voice WAV gespeichert: %s (%.0fKB)", wav_path, size_kb)
|
||||||
|
|
||||||
# Auf 24kHz mono normalisieren (falls App in anderem Format liefert)
|
# Auf 24kHz mono clippen auf 10s (F5-TTS Hard-Limit ist 12s,
|
||||||
ensure_24k_mono_wav(wav_path)
|
# kuerzer = schnellerer Warmup + Text+Audio bleiben aligned)
|
||||||
|
normalize_ref_wav(wav_path)
|
||||||
|
|
||||||
# Transkription ueber whisper-bridge anfragen
|
# Transkription ueber whisper-bridge anfragen
|
||||||
logger.info("Transkribiere '%s' via whisper-bridge...", name)
|
logger.info("Transkribiere '%s' via whisper-bridge...", name)
|
||||||
@@ -580,10 +656,15 @@ async def handle_voice_preload(ws, payload: dict, runner: F5Runner) -> None:
|
|||||||
|
|
||||||
# ── Haupt-Loop ──────────────────────────────────────────────
|
# ── Haupt-Loop ──────────────────────────────────────────────
|
||||||
|
|
||||||
async def run_loop(runner: F5Runner) -> None:
|
async def _broadcast_status(ws, state: str, **extra) -> None:
|
||||||
# Preload im Hintergrund starten damit der Startup nicht blockiert
|
"""Sendet service_status fuer das F5-TTS Modul.
|
||||||
asyncio.create_task(runner.ensure_loaded())
|
state: 'loading' | 'ready' | 'error'."""
|
||||||
|
payload = {"service": "f5tts", "state": state}
|
||||||
|
payload.update(extra)
|
||||||
|
await _send(ws, "service_status", payload)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_loop(runner: F5Runner) -> None:
|
||||||
use_tls = RVS_TLS
|
use_tls = RVS_TLS
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
@@ -601,6 +682,33 @@ async def run_loop(runner: F5Runner) -> None:
|
|||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
|
||||||
|
# Status-Broadcast: erst loading, dann ready nach erfolgreichem Load.
|
||||||
|
# Plus: config_request damit wir die persistierte Diagnostic-Config
|
||||||
|
# bekommen, falls aria-bridge ihre nicht von alleine sendet.
|
||||||
|
async def _load_with_status():
|
||||||
|
try:
|
||||||
|
if runner.model is not None:
|
||||||
|
logger.info("Initial: broadcaste ready (Modell schon im RAM: %s)", runner.model_id)
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds)
|
||||||
|
else:
|
||||||
|
logger.info("Initial: broadcaste loading + lade Modell '%s'", runner.model_id)
|
||||||
|
await _broadcast_status(ws, "loading", model=runner.model_id)
|
||||||
|
await runner.ensure_loaded()
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds)
|
||||||
|
logger.info("Initial: sende config_request an aria-bridge")
|
||||||
|
await _send(ws, "config_request", {"service": "f5tts"})
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Initial-Load crashed: %s", e)
|
||||||
|
try:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
asyncio.create_task(_load_with_status())
|
||||||
|
|
||||||
# TTS-Worker fuer diese Verbindung starten
|
# TTS-Worker fuer diese Verbindung starten
|
||||||
worker = asyncio.create_task(_tts_worker(ws, runner))
|
worker = asyncio.create_task(_tts_worker(ws, runner))
|
||||||
|
|
||||||
@@ -640,7 +748,26 @@ async def run_loop(runner: F5Runner) -> None:
|
|||||||
fut.set_result(payload.get("text") or "")
|
fut.set_result(payload.get("text") or "")
|
||||||
elif mtype == "config":
|
elif mtype == "config":
|
||||||
# F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe)
|
# F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe)
|
||||||
asyncio.create_task(runner.update_config(payload))
|
async def _update_with_status(p):
|
||||||
|
# Schaut ob ein Modell-Wechsel ansteht — falls ja:
|
||||||
|
# erst loading-Status, dann update, dann ready.
|
||||||
|
old_model = (runner.model_id, runner.ckpt_file, runner.vocab_file)
|
||||||
|
new_model_id = (p.get("f5ttsModel") or runner.model_id,
|
||||||
|
p.get("f5ttsCkptFile", runner.ckpt_file) or "",
|
||||||
|
p.get("f5ttsVocabFile", runner.vocab_file) or "")
|
||||||
|
will_reload = old_model != new_model_id
|
||||||
|
if will_reload:
|
||||||
|
await _broadcast_status(ws, "loading", model=new_model_id[0])
|
||||||
|
try:
|
||||||
|
await runner.update_config(p)
|
||||||
|
if will_reload:
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_id,
|
||||||
|
loadSeconds=runner.last_load_seconds)
|
||||||
|
except Exception as e:
|
||||||
|
if will_reload:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
asyncio.create_task(_update_with_status(payload))
|
||||||
# Voice-Preload bei Wechsel
|
# Voice-Preload bei Wechsel
|
||||||
v = (payload.get("xttsVoice") or "").strip()
|
v = (payload.get("xttsVoice") or "").strip()
|
||||||
if v and v != _last_diag_voice:
|
if v and v != _last_diag_voice:
|
||||||
|
|||||||
+62
-11
@@ -152,8 +152,17 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
t_load = time.time()
|
t_load = time.time()
|
||||||
|
# Falls Modell noch nicht geladen (Race-Condition: stt_request vor config)
|
||||||
|
# → Status-Broadcast loading→ready damit der App-Banner aufpoppt
|
||||||
|
needs_load = runner.model is None or runner.model_size != model
|
||||||
|
if needs_load:
|
||||||
|
await _broadcast_status(ws, "loading", model=model)
|
||||||
await runner.ensure_loaded(model)
|
await runner.ensure_loaded(model)
|
||||||
load_ms = int((time.time() - t_load) * 1000)
|
load_ms = int((time.time() - t_load) * 1000)
|
||||||
|
if needs_load:
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_size,
|
||||||
|
loadSeconds=load_ms / 1000.0)
|
||||||
|
|
||||||
audio = ffmpeg_to_float32(audio_b64, mime_type)
|
audio = ffmpeg_to_float32(audio_b64, mime_type)
|
||||||
if audio.size == 0:
|
if audio.size == 0:
|
||||||
@@ -184,13 +193,15 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
async def run_loop(runner: WhisperRunner) -> None:
|
async def _broadcast_status(ws, state: str, **extra) -> None:
|
||||||
# Modell vorab laden damit erste Anfrage flott ist
|
"""Sendet service_status fuer das Whisper-Modul.
|
||||||
try:
|
state: 'loading' | 'ready' | 'error'."""
|
||||||
await runner.ensure_loaded(WHISPER_MODEL)
|
payload = {"service": "whisper", "state": state}
|
||||||
except Exception as e:
|
payload.update(extra)
|
||||||
logger.error("Preload fehlgeschlagen: %s — Fortsetzung, wird bei erstem Request nachgeladen", e)
|
await _send(ws, "service_status", payload)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_loop(runner: WhisperRunner) -> None:
|
||||||
use_tls = RVS_TLS
|
use_tls = RVS_TLS
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
@@ -201,10 +212,35 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
||||||
try:
|
try:
|
||||||
logger.info("Verbinde zu RVS: %s", masked)
|
logger.info("Verbinde zu RVS: %s", masked)
|
||||||
async with websockets.connect(url, ping_interval=20, ping_timeout=10) as ws:
|
# max_size 50MB damit grosse stt_request (Voice-Cloning-WAVs als
|
||||||
|
# base64 koennen mehrere MB werden) nicht das Frame-Limit sprengen
|
||||||
|
# und die Verbindung mit 1009 'message too big' killen.
|
||||||
|
async with websockets.connect(url, ping_interval=20, ping_timeout=10, max_size=50 * 1024 * 1024) as ws:
|
||||||
logger.info("RVS verbunden")
|
logger.info("RVS verbunden")
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
|
||||||
|
# Initialer Status-Broadcast — uebertont alten "ready"-State
|
||||||
|
# im App/Diagnostic Banner (sonst denkt der User noch alles ist
|
||||||
|
# gut von vorher). Wenn Modell schon geladen → ready, sonst
|
||||||
|
# loading mit aktuellem (Default-)Namen.
|
||||||
|
# Plus: config_request an aria-bridge — wir wissen nicht ob
|
||||||
|
# sie auch grad reconnected hat oder schon laenger online ist.
|
||||||
|
async def _initial_handshake():
|
||||||
|
try:
|
||||||
|
if runner.model is not None:
|
||||||
|
logger.info("Initial: broadcaste ready (Modell schon im RAM: %s)", runner.model_size)
|
||||||
|
await _broadcast_status(ws, "ready", model=runner.model_size)
|
||||||
|
else:
|
||||||
|
init_model = runner.model_size or WHISPER_MODEL
|
||||||
|
logger.info("Initial: broadcaste loading (model=%s)", init_model)
|
||||||
|
await _broadcast_status(ws, "loading", model=init_model)
|
||||||
|
logger.info("Initial: sende config_request an aria-bridge")
|
||||||
|
await _send(ws, "config_request", {"service": "whisper"})
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception("Initial-Handshake crashed: %s", e)
|
||||||
|
asyncio.create_task(_initial_handshake())
|
||||||
|
|
||||||
async for raw in ws:
|
async for raw in ws:
|
||||||
try:
|
try:
|
||||||
msg = json.loads(raw)
|
msg = json.loads(raw)
|
||||||
@@ -220,10 +256,25 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||||||
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
||||||
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
||||||
elif mtype == "config":
|
elif mtype == "config":
|
||||||
new_model = payload.get("whisperModel")
|
new_model = payload.get("whisperModel") or WHISPER_MODEL
|
||||||
if new_model and new_model != runner.model_size:
|
# Laden wenn (a) noch nix geladen, oder (b) Modell wechselt
|
||||||
logger.info("Config-Broadcast: Whisper-Modell → %s", new_model)
|
needs_load = (runner.model is None) or (new_model != runner.model_size)
|
||||||
asyncio.create_task(runner.ensure_loaded(new_model))
|
if needs_load:
|
||||||
|
logger.info("Config-Broadcast: Whisper-Modell -> %s%s",
|
||||||
|
new_model,
|
||||||
|
" (initial)" if runner.model is None else " (Wechsel)")
|
||||||
|
async def _swap_with_status(target):
|
||||||
|
await _broadcast_status(ws, "loading", model=target)
|
||||||
|
try:
|
||||||
|
t0 = time.time()
|
||||||
|
await runner.ensure_loaded(target)
|
||||||
|
elapsed = time.time() - t0
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_size,
|
||||||
|
loadSeconds=elapsed)
|
||||||
|
except Exception as e:
|
||||||
|
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||||
|
asyncio.create_task(_swap_with_status(new_model))
|
||||||
else:
|
else:
|
||||||
# Alle anderen Nachrichten debug-loggen — hilft beim Diagnostizieren,
|
# Alle anderen Nachrichten debug-loggen — hilft beim Diagnostizieren,
|
||||||
# ob stt_request ueberhaupt durch den RVS kommt
|
# ob stt_request ueberhaupt durch den RVS kommt
|
||||||
|
|||||||
Reference in New Issue
Block a user