Compare commits
2 Commits
a029267d9d
...
a361015ff4
| Author | SHA1 | Date |
|---|---|---|
|
|
a361015ff4 | |
|
|
d83b555209 |
|
|
@ -54,7 +54,10 @@ function cleanupRooms() {
|
||||||
|
|
||||||
// ── WebSocket-Server starten ────────────────────────────────────────
|
// ── WebSocket-Server starten ────────────────────────────────────────
|
||||||
|
|
||||||
const wss = new WebSocketServer({ port: PORT });
|
// maxPayload 50MB: TTS-Streaming + Voice-Upload (WAV als base64) +
|
||||||
|
// audio_pcm Chunks koennen die ws-Library Default 1MB ueberschreiten.
|
||||||
|
// Default-Limit war der Killer fuer die voice_upload Pipeline.
|
||||||
|
const wss = new WebSocketServer({ port: PORT, maxPayload: 50 * 1024 * 1024 });
|
||||||
|
|
||||||
wss.on("listening", () => {
|
wss.on("listening", () => {
|
||||||
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
log(`RVS läuft auf Port ${PORT} | Max Sessions: ${MAX_SESSIONS}`);
|
||||||
|
|
|
||||||
|
|
@ -152,8 +152,17 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
t_load = time.time()
|
t_load = time.time()
|
||||||
|
# Falls Modell noch nicht geladen (Race-Condition: stt_request vor config)
|
||||||
|
# → Status-Broadcast loading→ready damit der App-Banner aufpoppt
|
||||||
|
needs_load = runner.model is None or runner.model_size != model
|
||||||
|
if needs_load:
|
||||||
|
await _broadcast_status(ws, "loading", model=model)
|
||||||
await runner.ensure_loaded(model)
|
await runner.ensure_loaded(model)
|
||||||
load_ms = int((time.time() - t_load) * 1000)
|
load_ms = int((time.time() - t_load) * 1000)
|
||||||
|
if needs_load:
|
||||||
|
await _broadcast_status(ws, "ready",
|
||||||
|
model=runner.model_size,
|
||||||
|
loadSeconds=load_ms / 1000.0)
|
||||||
|
|
||||||
audio = ffmpeg_to_float32(audio_b64, mime_type)
|
audio = ffmpeg_to_float32(audio_b64, mime_type)
|
||||||
if audio.size == 0:
|
if audio.size == 0:
|
||||||
|
|
@ -203,27 +212,23 @@ async def run_loop(runner: WhisperRunner) -> None:
|
||||||
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
masked = url.replace(RVS_TOKEN, "***") if RVS_TOKEN else url
|
||||||
try:
|
try:
|
||||||
logger.info("Verbinde zu RVS: %s", masked)
|
logger.info("Verbinde zu RVS: %s", masked)
|
||||||
async with websockets.connect(url, ping_interval=20, ping_timeout=10) as ws:
|
# max_size 50MB damit grosse stt_request (Voice-Cloning-WAVs als
|
||||||
|
# base64 koennen mehrere MB werden) nicht das Frame-Limit sprengen
|
||||||
|
# und die Verbindung mit 1009 'message too big' killen.
|
||||||
|
async with websockets.connect(url, ping_interval=20, ping_timeout=10, max_size=50 * 1024 * 1024) as ws:
|
||||||
logger.info("RVS verbunden")
|
logger.info("RVS verbunden")
|
||||||
retry_s = 2
|
retry_s = 2
|
||||||
tls_fallback_tried = False
|
tls_fallback_tried = False
|
||||||
|
|
||||||
# Modell laden, dabei loading→ready broadcasten
|
# KEIN initialer Preload. Der aria-bridge broadcastet kurz nach
|
||||||
async def _load_with_status():
|
# RVS-Connect die persistierte Config (whisperModel) — wir laden
|
||||||
if runner.model is not None:
|
# erst wenn der drinsteht, sonst wuerde 2x geladen werden
|
||||||
await _broadcast_status(ws, "ready", model=runner.model_size)
|
# (small als ENV-Default + dann das echte Modell).
|
||||||
return
|
# Wenn ein stt_request schneller kommt als die Config: ensure_loaded
|
||||||
await _broadcast_status(ws, "loading", model=WHISPER_MODEL)
|
# im Handler greift dann ein und laedt das angeforderte Modell.
|
||||||
try:
|
if runner.model is not None:
|
||||||
t0 = time.time()
|
# Wir sind reconnectet — Modell schon im RAM, einfach 'ready'
|
||||||
await runner.ensure_loaded(WHISPER_MODEL)
|
asyncio.create_task(_broadcast_status(ws, "ready", model=runner.model_size))
|
||||||
elapsed = time.time() - t0
|
|
||||||
await _broadcast_status(ws, "ready",
|
|
||||||
model=runner.model_size,
|
|
||||||
loadSeconds=elapsed)
|
|
||||||
except Exception as e:
|
|
||||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
|
||||||
asyncio.create_task(_load_with_status())
|
|
||||||
|
|
||||||
async for raw in ws:
|
async for raw in ws:
|
||||||
try:
|
try:
|
||||||
|
|
@ -240,9 +245,13 @@ async def run_loop(runner: WhisperRunner) -> None:
|
||||||
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
||||||
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
||||||
elif mtype == "config":
|
elif mtype == "config":
|
||||||
new_model = payload.get("whisperModel")
|
new_model = payload.get("whisperModel") or WHISPER_MODEL
|
||||||
if new_model and new_model != runner.model_size:
|
# Laden wenn (a) noch nix geladen, oder (b) Modell wechselt
|
||||||
logger.info("Config-Broadcast: Whisper-Modell -> %s", new_model)
|
needs_load = (runner.model is None) or (new_model != runner.model_size)
|
||||||
|
if needs_load:
|
||||||
|
logger.info("Config-Broadcast: Whisper-Modell -> %s%s",
|
||||||
|
new_model,
|
||||||
|
" (initial)" if runner.model is None else " (Wechsel)")
|
||||||
async def _swap_with_status(target):
|
async def _swap_with_status(target):
|
||||||
await _broadcast_status(ws, "loading", model=target)
|
await _broadcast_status(ws, "loading", model=target)
|
||||||
try:
|
try:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue