From d83b555209545e1f41b629b6a6926ff57b07dc9c Mon Sep 17 00:00:00 2001 From: duffyduck Date: Fri, 24 Apr 2026 16:50:46 +0200 Subject: [PATCH] =?UTF-8?q?fix(whisper):=20kein=20eager=20preload=20mehr?= =?UTF-8?q?=20=E2=80=94=20wartet=20auf=20config-Broadcast?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vorher: Container-Start lud erst 'small' (env default), dann nochmal das in Diagnostic konfigurierte Modell (z.B. large-v3) wenn die config-Broadcast vom aria-bridge ankam. Doppelter Download, doppelte Wartezeit, doppelter VRAM-Peak. Jetzt: - Initial wird NICHTS geladen - aria-bridge sendet die persistierte voice_config.json kurz nach RVS-Connect → whisper-bridge sieht den richtigen Modellnamen - config-Handler erkennt: noch nichts geladen ODER Wechsel → loading-Broadcast → ensure_loaded → ready-Broadcast - stt_request-Handler: gleicher Status-Broadcast falls Race-Condition (Spracheingabe in den ersten 1-2s nach Container-Start) Co-Authored-By: Claude Opus 4.7 (1M context) --- xtts/whisper/bridge.py | 44 ++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/xtts/whisper/bridge.py b/xtts/whisper/bridge.py index 9210062..e44022f 100644 --- a/xtts/whisper/bridge.py +++ b/xtts/whisper/bridge.py @@ -152,8 +152,17 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None: try: t_load = time.time() + # Falls Modell noch nicht geladen (Race-Condition: stt_request vor config) + # → Status-Broadcast loading→ready damit der App-Banner aufpoppt + needs_load = runner.model is None or runner.model_size != model + if needs_load: + await _broadcast_status(ws, "loading", model=model) await runner.ensure_loaded(model) load_ms = int((time.time() - t_load) * 1000) + if needs_load: + await _broadcast_status(ws, "ready", + model=runner.model_size, + loadSeconds=load_ms / 1000.0) audio = ffmpeg_to_float32(audio_b64, mime_type) if audio.size == 0: @@ -208,22 +217,15 @@ async def run_loop(runner: WhisperRunner) -> None: retry_s = 2 tls_fallback_tried = False - # Modell laden, dabei loading→ready broadcasten - async def _load_with_status(): - if runner.model is not None: - await _broadcast_status(ws, "ready", model=runner.model_size) - return - await _broadcast_status(ws, "loading", model=WHISPER_MODEL) - try: - t0 = time.time() - await runner.ensure_loaded(WHISPER_MODEL) - elapsed = time.time() - t0 - await _broadcast_status(ws, "ready", - model=runner.model_size, - loadSeconds=elapsed) - except Exception as e: - await _broadcast_status(ws, "error", error=str(e)[:200]) - asyncio.create_task(_load_with_status()) + # KEIN initialer Preload. Der aria-bridge broadcastet kurz nach + # RVS-Connect die persistierte Config (whisperModel) — wir laden + # erst wenn der drinsteht, sonst wuerde 2x geladen werden + # (small als ENV-Default + dann das echte Modell). + # Wenn ein stt_request schneller kommt als die Config: ensure_loaded + # im Handler greift dann ein und laedt das angeforderte Modell. + if runner.model is not None: + # Wir sind reconnectet — Modell schon im RAM, einfach 'ready' + asyncio.create_task(_broadcast_status(ws, "ready", model=runner.model_size)) async for raw in ws: try: @@ -240,9 +242,13 @@ async def run_loop(runner: WhisperRunner) -> None: req_id[:8] if req_id != "?" else "?", audio_len // 1365) asyncio.create_task(handle_stt_request(ws, payload, runner)) elif mtype == "config": - new_model = payload.get("whisperModel") - if new_model and new_model != runner.model_size: - logger.info("Config-Broadcast: Whisper-Modell -> %s", new_model) + new_model = payload.get("whisperModel") or WHISPER_MODEL + # Laden wenn (a) noch nix geladen, oder (b) Modell wechselt + needs_load = (runner.model is None) or (new_model != runner.model_size) + if needs_load: + logger.info("Config-Broadcast: Whisper-Modell -> %s%s", + new_model, + " (initial)" if runner.model is None else " (Wechsel)") async def _swap_with_status(target): await _broadcast_status(ws, "loading", model=target) try: