From 493cba36a2596941aeb5de5750254a54b1edb182 Mon Sep 17 00:00:00 2001 From: duffyduck Date: Sat, 30 May 2026 22:00:55 +0200 Subject: [PATCH] feat(diagnostic): RVS-Debug-Logs fuer Whisper- und F5TTS-Bridge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stefan's Gamebox ist Windows (kein SSH-Zugriff), und in Zukunft koennten whisper/f5tts auf separaten Hosts laufen. Wir brauchen deshalb einen Logging-Pfad ueber RVS — gleicher Mechanismus wie fuer die App (reportAppDebug). Beide Bridges senden jetzt app_log-Messages mit platform="whisper" bzw. "f5tts". aria-bridge schreibt sie in /shared/logs/app.log (unverändert), Live-Logs-Tab + Diagnostic /api/app-log lesen mit. Toggle via aria-bridge config: whisperDebugLog: bool — default OFF (aktuell aber ON in whisper-bridge weil wir Phase-1/2- Pipeline einfahren) f5ttsDebugLog: bool — default OFF Beide werden in voice_config.json persistiert + nach RVS-Connect rebroadcastet, damit Toggle Container-Restart ueberlebt. Whisper-Bridge logt aktuell: boot → Streaming-Mode-Marker (sehen wir damit ob neue Version aktiv ist) stream.start → stt_stream_start angekommen stream.chunk → alle 25 Chunks (=5s Audio) einer stream.chunk.reject → Chunk fuer unbekannte Session stream.partial → Whisper hat neuen Text erkannt stream.final → Endpoint detected, finaler Text raus stream.end → stt_stream_end angekommen config → Toggle umgeschaltet F5TTS-Helper ist da (gleicher Pattern), Logging-Punkte kommen spaeter wenn wir ein konkretes TTS-Problem zu debuggen haben. --- bridge/aria_bridge.py | 17 ++++++++ xtts/f5tts/bridge.py | 59 +++++++++++++++++++++++++++ xtts/whisper/bridge.py | 90 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+) diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 2972c20..2b663d5 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -556,6 +556,12 @@ class ARIABridge: for k in ("fluxDefaultModel", "fluxKeywordRaw", "fluxKeywordSwitch", "huggingfaceToken"): if k in vc: self._flux_config[k] = vc[k] + # Debug-Log-Toggles fuer Whisper / F5TTS Bridges (Diagnostic-Toggle). + # Default: aus — sonst muellen wir uns volle Disk wenn alles laeuft. + self._debug_log_config: dict = {} + for k in ("whisperDebugLog", "f5ttsDebugLog"): + if k in vc: + self._debug_log_config[k] = bool(vc[k]) logger.info("Voice-Config geladen: tts=%s voice=%s f5tts=%s flux=%s", self.tts_enabled, self.xtts_voice or "default", self._f5tts_config or "defaults", @@ -1304,6 +1310,7 @@ class ARIABridge: payload["xttsSpeed"] = self._persistent_xtts_speed payload.update(getattr(self, "_f5tts_config", {}) or {}) payload.update(getattr(self, "_flux_config", {}) or {}) + payload.update(getattr(self, "_debug_log_config", {}) or {}) await self._send_to_rvs({ "type": "config", "payload": payload, @@ -1978,6 +1985,15 @@ class ARIABridge: self._flux_config = {} self._flux_config[k] = payload[k] changed = True + # Debug-Log-Toggles fuer Whisper- und F5TTS-Bridge — werden via + # naechstem config-Broadcast an die jeweiligen Bridges weitergegeben. + # Persistent damit Toggle einen Container-Restart ueberlebt. + for k in ("whisperDebugLog", "f5ttsDebugLog"): + if k in payload: + if not hasattr(self, "_debug_log_config"): + self._debug_log_config = {} + self._debug_log_config[k] = bool(payload[k]) + changed = True # Persistent speichern in Shared Volume if changed: try: @@ -1991,6 +2007,7 @@ class ARIABridge: config_data["xttsSpeed"] = self._persistent_xtts_speed config_data.update(getattr(self, "_f5tts_config", {})) config_data.update(getattr(self, "_flux_config", {})) + config_data.update(getattr(self, "_debug_log_config", {})) with open("/shared/config/voice_config.json", "w") as f: json.dump(config_data, f, indent=2) logger.info("[rvs] Voice-Config gespeichert: %s", config_data) diff --git a/xtts/f5tts/bridge.py b/xtts/f5tts/bridge.py index c986e2b..9146d63 100644 --- a/xtts/f5tts/bridge.py +++ b/xtts/f5tts/bridge.py @@ -375,6 +375,41 @@ async def _send(ws, mtype: str, payload: dict) -> None: logger.warning("Send fehlgeschlagen (%s): %s", mtype, e) +# ────────────────────────────────────────────────────────────── +# DEBUG-LOG ueber RVS → /shared/logs/app.log +# +# Gleiches Pattern wie in whisper-bridge: Stefan's Gamebox ist +# Windows (kein SSH), in Zukunft koennten whisper + f5tts auf +# unterschiedlichen Hosts laufen. Logs ueber RVS heisst: ein Pfad. +# +# Toggle via aria-bridge config broadcast: f5ttsDebugLog (bool). +# ────────────────────────────────────────────────────────────── +_DEBUG_LOG_TO_BRIDGE: bool = False # default OFF — TTS-Renders sind teurer + # zu debuggen, normalerweise nicht noetig + + +async def _debug_log(ws, scope: str, message: str, level: str = "info") -> None: + """Schickt einen app_log via RVS → /shared/logs/app.log mit platform='f5tts'. + No-op wenn Toggle aus.""" + if not _DEBUG_LOG_TO_BRIDGE: + return + try: + await ws.send(json.dumps({ + "type": "app_log", + "payload": { + "ts": int(time.time() * 1000), + "platform": "f5tts", + "level": level, + "scope": scope, + "message": str(message)[:2000], + "stack": "", + }, + "timestamp": int(time.time() * 1000), + })) + except Exception: + pass + + # ── Interne Transkription via whisper-bridge ──────────────── _pending_stt: dict[str, asyncio.Future] = {} @@ -867,6 +902,30 @@ async def run_loop(runner: F5Runner) -> None: else: fut.set_result(payload.get("text") or "") elif mtype == "config": + # Debug-Toggle (gleiche Semantik wie in whisper-bridge) + if "f5ttsDebugLog" in payload: + global _DEBUG_LOG_TO_BRIDGE + old = _DEBUG_LOG_TO_BRIDGE + _DEBUG_LOG_TO_BRIDGE = bool(payload.get("f5ttsDebugLog", False)) + if old != _DEBUG_LOG_TO_BRIDGE: + logger.info("Debug-Log-to-Bridge: %s", "ON" if _DEBUG_LOG_TO_BRIDGE else "OFF") + # Last gasp wenn ausgeschaltet wird + if not _DEBUG_LOG_TO_BRIDGE: + try: + await ws.send(json.dumps({ + "type": "app_log", + "payload": { + "ts": int(time.time() * 1000), + "platform": "f5tts", + "level": "info", + "scope": "config", + "message": "debug-log OFF (toggle aus)", + "stack": "", + }, + "timestamp": int(time.time() * 1000), + })) + except Exception: + pass # F5-TTS-Settings aktualisieren (Modell, cfg_strength, nfe) async def _update_with_status(p): # Schaut ob ein Modell-Wechsel ansteht — falls ja: diff --git a/xtts/whisper/bridge.py b/xtts/whisper/bridge.py index 34d6646..38794a6 100644 --- a/xtts/whisper/bridge.py +++ b/xtts/whisper/bridge.py @@ -171,6 +171,43 @@ async def _send(ws, mtype: str, payload: dict) -> None: logger.warning("Send fehlgeschlagen (%s): %s", mtype, e) +# ────────────────────────────────────────────────────────────── +# DEBUG-LOG ueber RVS → /shared/logs/app.log +# +# Stefan's Gamebox ist Windows, kein SSH → wir brauchen Whisper-Bridge- +# Logs ueber den gleichen Pfad wie die App: app_log-Messages via RVS, +# aria-bridge schreibt sie in /shared/logs/app.log. Diagnostic / App- +# Logs-Tab zeigen sie dann mit platform="whisper". +# +# Toggle via aria-bridge config broadcast: whisperDebugLog (bool). +# Default ON solange wir Phase-1/2-Pipeline einfahren — danach +# defaultet aria-bridge ihn aus damit kein Spam. +# ────────────────────────────────────────────────────────────── +_DEBUG_LOG_TO_BRIDGE: bool = True + + +async def _debug_log(ws, scope: str, message: str, level: str = "info") -> None: + """Schickt einen app_log via RVS → landet in /shared/logs/app.log mit + platform='whisper'. Idempotent: wenn Toggle aus → no-op.""" + if not _DEBUG_LOG_TO_BRIDGE: + return + try: + await ws.send(json.dumps({ + "type": "app_log", + "payload": { + "ts": int(time.time() * 1000), + "platform": "whisper", + "level": level, + "scope": scope, + "message": str(message)[:2000], + "stack": "", + }, + "timestamp": int(time.time() * 1000), + })) + except Exception: + pass + + # ────────────────────────────────────────────────────────────── # STREAMING-SESSIONS # ────────────────────────────────────────────────────────────── @@ -365,6 +402,8 @@ class SessionManager: "audioRequestId": sess.audio_request_id, "text": text, }) + await _debug_log(ws, "stream.partial", + f"id={sess.request_id[:12]} text={text[:80]!r}") else: # Stagnation pruefen — Endpoint-Bedingung if sess.last_growth_at == 0.0: @@ -410,6 +449,9 @@ class SessionManager: logger.info("Stream %s: FINAL (reason=%s, %.1fs Audio, %dms): %r", sess.request_id[:8], reason, duration_s, stt_ms, final_text[:120]) + await _debug_log(ws, "stream.final", + f"id={sess.request_id[:12]} reason={reason} " + f"audio={duration_s:.1f}s stt={stt_ms}ms text={final_text[:80]!r}") # stt_endpoint: das ist DAS Event auf das aria-bridge horcht fuer den # Brain-Shortcut. Enthaelt alle Felder die bisher in 'audio' lagen, @@ -537,6 +579,11 @@ async def run_loop(runner: WhisperRunner, sessions: SessionManager) -> None: await _broadcast_status(ws, "loading", model=init_model) logger.info("Initial: sende config_request an aria-bridge") await _send(ws, "config_request", {"service": "whisper"}) + # Startup-Marker — App-Logs zeigen damit ob Streaming-Code + # ueberhaupt aktiv ist (Stefan baut auf Gamebox via PS, + # Build/Restart kann unbeabsichtigt alte Version weiterfahren). + await _debug_log(ws, "boot", + "whisper-bridge online — streaming-mode ENABLED, debug-log ON") except Exception as e: logger.exception("Initial-Handshake crashed: %s", e) asyncio.create_task(_initial_handshake()) @@ -557,6 +604,11 @@ async def run_loop(runner: WhisperRunner, sessions: SessionManager) -> None: asyncio.create_task(handle_stt_request(ws, payload, runner)) elif mtype == "stt_stream_start": + await _debug_log(ws, "stream.start", + f"received id={payload.get('requestId', '?')[:12]} " + f"audioReqId={payload.get('audioRequestId', '?')[:16]} " + f"endpointMs={payload.get('endpointMs')} " + f"hardCapMs={payload.get('hardCapMs')}") # Ggf. Modell sicherstellen — sonst antwortet der erste # transcribe-Call mit Leerstring weil Model None. target_model = payload.get("model") or runner.model_size or WHISPER_MODEL @@ -581,14 +633,52 @@ async def run_loop(runner: WhisperRunner, sessions: SessionManager) -> None: # Sehr verbose im Schlimmstfall — debug-Level reicht. logger.debug("stt_audio_chunk: unbekannte/closed session %s", payload.get("requestId", "")[:8]) + await _debug_log(ws, "stream.chunk.reject", + f"unknown/closed session id={payload.get('requestId', '?')[:12]}", + level="warn") + else: + # Nur alle 25 Chunks loggen (=5s Audio) — sonst Spam. + try: + seq = int(payload.get("seq", 0) or 0) + if seq % 25 == 0: + await _debug_log(ws, "stream.chunk", + f"id={payload.get('requestId', '?')[:12]} seq={seq}") + except (TypeError, ValueError): + pass elif mtype == "stt_stream_end": req_id = payload.get("requestId", "") logger.info("stt_stream_end empfangen: id=%s reason=%s", req_id[:8], payload.get("reason", "")) + await _debug_log(ws, "stream.end", + f"received id={req_id[:12]} reason={payload.get('reason', '')}") sessions.end_session(req_id) elif mtype == "config": + # Debug-Toggle: aria-bridge broadcastet jetzt whisperDebugLog + # damit Stefan im laufenden Betrieb via Diagnostic-Settings + # die Logs an/aus schalten kann. + if "whisperDebugLog" in payload: + global _DEBUG_LOG_TO_BRIDGE + old = _DEBUG_LOG_TO_BRIDGE + _DEBUG_LOG_TO_BRIDGE = bool(payload.get("whisperDebugLog", False)) + if old != _DEBUG_LOG_TO_BRIDGE: + logger.info("Debug-Log-to-Bridge: %s", "ON" if _DEBUG_LOG_TO_BRIDGE else "OFF") + # Last gasp wenn ausgeschaltet wird damit Stefan im Log sieht + # dass der Toggle griff. + if not _DEBUG_LOG_TO_BRIDGE: + await ws.send(json.dumps({ + "type": "app_log", + "payload": { + "ts": int(time.time() * 1000), + "platform": "whisper", + "level": "info", + "scope": "config", + "message": "debug-log OFF (toggle aus)", + "stack": "", + }, + "timestamp": int(time.time() * 1000), + })) new_model = payload.get("whisperModel") or WHISPER_MODEL needs_load = (runner.model is None) or (new_model != runner.model_size) if needs_load: