diff --git a/bridge/aria_bridge.py b/bridge/aria_bridge.py index 3f8283b..82627d5 100644 --- a/bridge/aria_bridge.py +++ b/bridge/aria_bridge.py @@ -556,6 +556,8 @@ class ARIABridge: async def connect_to_core(self) -> None: """Persistente WebSocket-Verbindung zu aria-core (OpenClaw Gateway).""" retry_delay = 2 + max_conn_failures = 6 + conn_fail_count = 0 while self.running: try: @@ -570,6 +572,7 @@ class ARIABridge: self.ws_core = ws retry_delay = 2 + conn_fail_count = 0 logger.info("[core] Verbunden und authentifiziert") async for message in ws: @@ -577,13 +580,25 @@ class ARIABridge: except websockets.ConnectionClosed: logger.warning("[core] Verbindung verloren") + conn_fail_count += 1 except ConnectionRefusedError: logger.warning("[core] Nicht erreichbar (%s)", self.ws_url) + conn_fail_count += 1 except Exception: logger.exception("[core] WebSocket-Fehler") + conn_fail_count += 1 finally: self.ws_core = None + # Nach N aufeinanderfolgenden Fehlern: Exit damit Docker neustartet + # (bekommt neuen Network-Namespace wenn aria-core restarted wurde) + if conn_fail_count >= max_conn_failures: + logger.error( + "[core] %dx nicht erreichbar — Exit fuer Docker-Restart", + conn_fail_count, + ) + sys.exit(1) + if self.running: logger.info("[core] Reconnect in %ds...", retry_delay) await asyncio.sleep(retry_delay) diff --git a/diagnostic/server.js b/diagnostic/server.js index d583f5b..067f894 100644 --- a/diagnostic/server.js +++ b/diagnostic/server.js @@ -82,6 +82,27 @@ function pipelineEnd(ok, detail) { pipelineActive = false; } +// ── Auto-Restart bei Netzwerk-Namespace-Verlust ────── +// Bei network_mode: "service:aria" verliert dieser Container +// den Netzwerkzugriff wenn aria-core neustartet. +// Nach MAX_GATEWAY_FAILURES aufeinanderfolgenden Fehlern → process.exit +// Docker restart: unless-stopped startet uns mit neuem Namespace neu. +const MAX_GATEWAY_FAILURES = 6; // 6 × 5s = 30s +let gatewayFailCount = 0; + +function checkGatewayHealth() { + if (state.gateway.status === "connected") { + gatewayFailCount = 0; + return; + } + gatewayFailCount++; + if (gatewayFailCount >= MAX_GATEWAY_FAILURES) { + log("error", "server", `Gateway ${MAX_GATEWAY_FAILURES}x nicht erreichbar — Neustart (Netzwerk-Namespace veraltet?)`); + // Kurze Verzoegerung damit die Log-Nachricht noch gesendet wird + setTimeout(() => process.exit(1), 500); + } +} + function nextReqId() { return `diag-${++reqIdCounter}`; } @@ -191,6 +212,7 @@ async function connectGateway() { state.gateway.status = "connected"; state.gateway.handshakeOk = true; state.gateway.lastError = null; + gatewayFailCount = 0; } else { const error = typeof response.error === "string" ? response.error @@ -219,6 +241,7 @@ async function connectGateway() { state.gateway.handshakeOk = false; gatewayWs = null; broadcastState(); + checkGatewayHealth(); // Auto-Reconnect nach 5s setTimeout(connectGateway, 5000); }); @@ -236,6 +259,7 @@ async function connectGateway() { state.gateway.handshakeOk = false; gatewayWs = null; broadcastState(); + checkGatewayHealth(); // Retry nach 5s setTimeout(connectGateway, 5000); } diff --git a/docker-compose.yml b/docker-compose.yml index d48a04d..39f6ec9 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,10 +25,11 @@ services: privileged: true # ARIAs Wohnung — sie hat die Schlüssel depends_on: - proxy + ports: + - "3001:3001" # Diagnostic Web-UI (laeuft im shared network) environment: - CANVAS_HOST=127.0.0.1 - OPENCLAW_GATEWAY_TOKEN=${ARIA_AUTH_TOKEN} - - OPENCLAW_GATEWAY_BIND=0.0.0.0 - DEFAULT_MODEL=proxy/claude-sonnet-4 - RATE_LIMIT_PER_USER=30 - DISPLAY=:0 @@ -51,6 +52,7 @@ services: container_name: aria-bridge depends_on: - aria + network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789 volumes: - ./aria-data/voices:/voices:ro # TTS Stimmen - ./aria-data/config/aria.env:/config/aria.env @@ -61,7 +63,6 @@ services: - /dev/snd environment: - PULSE_SERVER=unix:/run/user/1000/pulse/native - - ARIA_CORE_WS=ws://aria:18789 # Gateway über aria-net (nicht localhost) - ARIA_AUTH_TOKEN=${ARIA_AUTH_TOKEN:-} - RVS_HOST=${RVS_HOST:-} - RVS_PORT=${RVS_PORT:-443} @@ -69,8 +70,6 @@ services: - RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true} - RVS_TOKEN=${RVS_TOKEN:-} restart: unless-stopped - networks: - - aria-net # ─── Diagnostic (Selbstcheck-UI) ────────────────────── diagnostic: @@ -78,12 +77,10 @@ services: container_name: aria-diagnostic depends_on: - aria - ports: - - "3001:3001" # Diagnostic Web-UI + network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789 volumes: - /var/run/docker.sock:/var/run/docker.sock:ro environment: - - ARIA_CORE_WS=ws://aria:18789 # Gateway über aria-net (nicht localhost) - ARIA_AUTH_TOKEN=${ARIA_AUTH_TOKEN:-} - PROXY_URL=http://proxy:3456 - RVS_HOST=${RVS_HOST:-} @@ -92,8 +89,6 @@ services: - RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true} - RVS_TOKEN=${RVS_TOKEN:-} restart: unless-stopped - networks: - - aria-net volumes: openclaw-config: # Persistiert ~/.openclaw (Model, Auth, Sessions)