feat: HF-Cache raus + service_status Banner in Diagnostic
Stefan akzeptiert die ~5min Modell-Download-Zeit nach jedem Container- Start, dafuer keine 50GB Cache-Bloat mehr und kein Bind-Mount-Verzeichnis zu pflegen. - xtts/docker-compose.yml: hf-cache Bind-Mount entfernt fuer beide Bridges. Modelle werden im writable Container-Layer abgelegt und mit jedem `docker compose down` automatisch weggeraeumt. - xtts/.gitignore: hf-cache/ Eintrag raus - RVS ALLOWED_TYPES: service_status hinzu Bridges broadcasten Lade-Status: - f5tts-bridge: bei Connect 'loading' -> ensure_loaded -> 'ready'. Auch bei config-getriggertem Modell-Wechsel: erst 'loading' Broadcast, dann reload, dann 'ready'. - whisper-bridge: gleiches Pattern. Modell wird jetzt erst nach RVS-Connect geladen damit der loading-Broadcast tatsaechlich rausgeht. Diagnostic: - server.js: service_status wird an Browser durchgereicht - index.html: neues Banner unten rechts (fixed position) zeigt Status fuer beide Services. Aggregiert: Icon ist Lupe waehrend Loading, Check wenn alles ready, X bei Error. - Wenn alles ready: X-Button erscheint (manuell schliessen) + nach 8s automatisches Fade-Out. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+55
-6
@@ -110,20 +110,26 @@ class F5Runner:
|
||||
self.vocab_file: str = DEFAULT_F5TTS_VOCAB_FILE
|
||||
self.cfg_strength: float = DEFAULT_F5TTS_CFG_STRENGTH
|
||||
self.nfe_step: int = DEFAULT_F5TTS_NFE_STEP
|
||||
# Last load-time fuer service_status Broadcast
|
||||
self.last_load_seconds: float = 0.0
|
||||
self._load_started_at: float = 0.0
|
||||
|
||||
def _load_blocking(self) -> None:
|
||||
cls = _get_f5tts_cls()
|
||||
logger.info("Lade F5-TTS '%s' (device=%s, ckpt=%s)...",
|
||||
self.model_id, F5TTS_DEVICE, self.ckpt_file or "default")
|
||||
t0 = time.time()
|
||||
self._load_started_at = time.time()
|
||||
kwargs = {"model": self.model_id, "device": F5TTS_DEVICE}
|
||||
if self.ckpt_file:
|
||||
kwargs["ckpt_file"] = self.ckpt_file
|
||||
if self.vocab_file:
|
||||
kwargs["vocab_file"] = self.vocab_file
|
||||
self.model = cls(**kwargs)
|
||||
elapsed = time.time() - self._load_started_at
|
||||
logger.info("F5-TTS geladen in %.1fs (cfg_strength=%.1f, nfe=%d)",
|
||||
time.time() - t0, self.cfg_strength, self.nfe_step)
|
||||
elapsed, self.cfg_strength, self.nfe_step)
|
||||
# Wird von outside (run_loop) gelesen um service_status auf 'ready' zu setzen
|
||||
self.last_load_seconds = elapsed
|
||||
|
||||
async def ensure_loaded(self) -> None:
|
||||
async with self._lock:
|
||||
@@ -580,10 +586,15 @@ async def handle_voice_preload(ws, payload: dict, runner: F5Runner) -> None:
|
||||
|
||||
# ── Haupt-Loop ──────────────────────────────────────────────
|
||||
|
||||
async def run_loop(runner: F5Runner) -> None:
|
||||
# Preload im Hintergrund starten damit der Startup nicht blockiert
|
||||
asyncio.create_task(runner.ensure_loaded())
|
||||
async def _broadcast_status(ws, state: str, **extra) -> None:
|
||||
"""Sendet service_status fuer das F5-TTS Modul.
|
||||
state: 'loading' | 'ready' | 'error'."""
|
||||
payload = {"service": "f5tts", "state": state}
|
||||
payload.update(extra)
|
||||
await _send(ws, "service_status", payload)
|
||||
|
||||
|
||||
async def run_loop(runner: F5Runner) -> None:
|
||||
use_tls = RVS_TLS
|
||||
retry_s = 2
|
||||
tls_fallback_tried = False
|
||||
@@ -601,6 +612,25 @@ async def run_loop(runner: F5Runner) -> None:
|
||||
retry_s = 2
|
||||
tls_fallback_tried = False
|
||||
|
||||
# Status-Broadcast: erst loading, dann ready nach erfolgreichem Load.
|
||||
# Modell wird hier (nicht ausserhalb der Schleife) gestartet damit
|
||||
# der Loading-Status auch wirklich uebertragen werden kann.
|
||||
async def _load_with_status():
|
||||
if runner.model is not None:
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_id,
|
||||
loadSeconds=runner.last_load_seconds)
|
||||
return
|
||||
await _broadcast_status(ws, "loading", model=runner.model_id)
|
||||
try:
|
||||
await runner.ensure_loaded()
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_id,
|
||||
loadSeconds=runner.last_load_seconds)
|
||||
except Exception as e:
|
||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||
asyncio.create_task(_load_with_status())
|
||||
|
||||
# TTS-Worker fuer diese Verbindung starten
|
||||
worker = asyncio.create_task(_tts_worker(ws, runner))
|
||||
|
||||
@@ -640,7 +670,26 @@ async def run_loop(runner: F5Runner) -> None:
|
||||
fut.set_result(payload.get("text") or "")
|
||||
elif mtype == "config":
|
||||
# F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe)
|
||||
asyncio.create_task(runner.update_config(payload))
|
||||
async def _update_with_status(p):
|
||||
# Schaut ob ein Modell-Wechsel ansteht — falls ja:
|
||||
# erst loading-Status, dann update, dann ready.
|
||||
old_model = (runner.model_id, runner.ckpt_file, runner.vocab_file)
|
||||
new_model_id = (p.get("f5ttsModel") or runner.model_id,
|
||||
p.get("f5ttsCkptFile", runner.ckpt_file) or "",
|
||||
p.get("f5ttsVocabFile", runner.vocab_file) or "")
|
||||
will_reload = old_model != new_model_id
|
||||
if will_reload:
|
||||
await _broadcast_status(ws, "loading", model=new_model_id[0])
|
||||
try:
|
||||
await runner.update_config(p)
|
||||
if will_reload:
|
||||
await _broadcast_status(ws, "ready",
|
||||
model=runner.model_id,
|
||||
loadSeconds=runner.last_load_seconds)
|
||||
except Exception as e:
|
||||
if will_reload:
|
||||
await _broadcast_status(ws, "error", error=str(e)[:200])
|
||||
asyncio.create_task(_update_with_status(payload))
|
||||
# Voice-Preload bei Wechsel
|
||||
v = (payload.get("xttsVoice") or "").strip()
|
||||
if v and v != _last_diag_voice:
|
||||
|
||||
Reference in New Issue
Block a user