fix(whisper): kein eager preload mehr — wartet auf config-Broadcast
Vorher: Container-Start lud erst 'small' (env default), dann nochmal das in Diagnostic konfigurierte Modell (z.B. large-v3) wenn die config-Broadcast vom aria-bridge ankam. Doppelter Download, doppelte Wartezeit, doppelter VRAM-Peak. Jetzt: - Initial wird NICHTS geladen - aria-bridge sendet die persistierte voice_config.json kurz nach RVS-Connect → whisper-bridge sieht den richtigen Modellnamen - config-Handler erkennt: noch nichts geladen ODER Wechsel → loading-Broadcast → ensure_loaded → ready-Broadcast - stt_request-Handler: gleicher Status-Broadcast falls Race-Condition (Spracheingabe in den ersten 1-2s nach Container-Start) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
a029267d9d
commit
d83b555209
|
|
@ -152,8 +152,17 @@ async def handle_stt_request(ws, payload: dict, runner: WhisperRunner) -> None:
|
|||
|
||||
try:
|
||||
t_load = time.time()
|
||||
# Falls Modell noch nicht geladen (Race-Condition: stt_request vor config)
|
||||
# → Status-Broadcast loading→ready damit der App-Banner aufpoppt
|
||||
needs_load = runner.model is None or runner.model_size != model
|
||||
if needs_load:
|
||||
await _broadcast_status(ws, "loading", model=model)
|
||||
await runner.ensure_loaded(model)
|
||||
load_ms = int((time.time() - t_load) * 1000)
|
||||
if needs_load:
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_size,
|
||||
loadSeconds=load_ms / 1000.0)
|
||||
|
||||
audio = ffmpeg_to_float32(audio_b64, mime_type)
|
||||
if audio.size == 0:
|
||||
|
|
@ -208,22 +217,15 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||
retry_s = 2
|
||||
tls_fallback_tried = False
|
||||
|
||||
# Modell laden, dabei loading→ready broadcasten
|
||||
async def _load_with_status():
|
||||
if runner.model is not None:
|
||||
await _broadcast_status(ws, "ready", model=runner.model_size)
|
||||
return
|
||||
await _broadcast_status(ws, "loading", model=WHISPER_MODEL)
|
||||
try:
|
||||
t0 = time.time()
|
||||
await runner.ensure_loaded(WHISPER_MODEL)
|
||||
elapsed = time.time() - t0
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_size,
|
||||
loadSeconds=elapsed)
|
||||
except Exception as e:
|
||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||
asyncio.create_task(_load_with_status())
|
||||
# KEIN initialer Preload. Der aria-bridge broadcastet kurz nach
|
||||
# RVS-Connect die persistierte Config (whisperModel) — wir laden
|
||||
# erst wenn der drinsteht, sonst wuerde 2x geladen werden
|
||||
# (small als ENV-Default + dann das echte Modell).
|
||||
# Wenn ein stt_request schneller kommt als die Config: ensure_loaded
|
||||
# im Handler greift dann ein und laedt das angeforderte Modell.
|
||||
if runner.model is not None:
|
||||
# Wir sind reconnectet — Modell schon im RAM, einfach 'ready'
|
||||
asyncio.create_task(_broadcast_status(ws, "ready", model=runner.model_size))
|
||||
|
||||
async for raw in ws:
|
||||
try:
|
||||
|
|
@ -240,9 +242,13 @@ async def run_loop(runner: WhisperRunner) -> None:
|
|||
req_id[:8] if req_id != "?" else "?", audio_len // 1365)
|
||||
asyncio.create_task(handle_stt_request(ws, payload, runner))
|
||||
elif mtype == "config":
|
||||
new_model = payload.get("whisperModel")
|
||||
if new_model and new_model != runner.model_size:
|
||||
logger.info("Config-Broadcast: Whisper-Modell -> %s", new_model)
|
||||
new_model = payload.get("whisperModel") or WHISPER_MODEL
|
||||
# Laden wenn (a) noch nix geladen, oder (b) Modell wechselt
|
||||
needs_load = (runner.model is None) or (new_model != runner.model_size)
|
||||
if needs_load:
|
||||
logger.info("Config-Broadcast: Whisper-Modell -> %s%s",
|
||||
new_model,
|
||||
" (initial)" if runner.model is None else " (Wechsel)")
|
||||
async def _swap_with_status(target):
|
||||
await _broadcast_status(ws, "loading", model=target)
|
||||
try:
|
||||
|
|
|
|||
Loading…
Reference in New Issue