swicthed back to network mod and added helatcheck for diagnostic to restart
This commit is contained in:
parent
72fdebe50d
commit
4893d5e2ba
|
|
@ -556,6 +556,8 @@ class ARIABridge:
|
|||
async def connect_to_core(self) -> None:
|
||||
"""Persistente WebSocket-Verbindung zu aria-core (OpenClaw Gateway)."""
|
||||
retry_delay = 2
|
||||
max_conn_failures = 6
|
||||
conn_fail_count = 0
|
||||
|
||||
while self.running:
|
||||
try:
|
||||
|
|
@ -570,6 +572,7 @@ class ARIABridge:
|
|||
|
||||
self.ws_core = ws
|
||||
retry_delay = 2
|
||||
conn_fail_count = 0
|
||||
logger.info("[core] Verbunden und authentifiziert")
|
||||
|
||||
async for message in ws:
|
||||
|
|
@ -577,13 +580,25 @@ class ARIABridge:
|
|||
|
||||
except websockets.ConnectionClosed:
|
||||
logger.warning("[core] Verbindung verloren")
|
||||
conn_fail_count += 1
|
||||
except ConnectionRefusedError:
|
||||
logger.warning("[core] Nicht erreichbar (%s)", self.ws_url)
|
||||
conn_fail_count += 1
|
||||
except Exception:
|
||||
logger.exception("[core] WebSocket-Fehler")
|
||||
conn_fail_count += 1
|
||||
finally:
|
||||
self.ws_core = None
|
||||
|
||||
# Nach N aufeinanderfolgenden Fehlern: Exit damit Docker neustartet
|
||||
# (bekommt neuen Network-Namespace wenn aria-core restarted wurde)
|
||||
if conn_fail_count >= max_conn_failures:
|
||||
logger.error(
|
||||
"[core] %dx nicht erreichbar — Exit fuer Docker-Restart",
|
||||
conn_fail_count,
|
||||
)
|
||||
sys.exit(1)
|
||||
|
||||
if self.running:
|
||||
logger.info("[core] Reconnect in %ds...", retry_delay)
|
||||
await asyncio.sleep(retry_delay)
|
||||
|
|
|
|||
|
|
@ -82,6 +82,27 @@ function pipelineEnd(ok, detail) {
|
|||
pipelineActive = false;
|
||||
}
|
||||
|
||||
// ── Auto-Restart bei Netzwerk-Namespace-Verlust ──────
|
||||
// Bei network_mode: "service:aria" verliert dieser Container
|
||||
// den Netzwerkzugriff wenn aria-core neustartet.
|
||||
// Nach MAX_GATEWAY_FAILURES aufeinanderfolgenden Fehlern → process.exit
|
||||
// Docker restart: unless-stopped startet uns mit neuem Namespace neu.
|
||||
const MAX_GATEWAY_FAILURES = 6; // 6 × 5s = 30s
|
||||
let gatewayFailCount = 0;
|
||||
|
||||
function checkGatewayHealth() {
|
||||
if (state.gateway.status === "connected") {
|
||||
gatewayFailCount = 0;
|
||||
return;
|
||||
}
|
||||
gatewayFailCount++;
|
||||
if (gatewayFailCount >= MAX_GATEWAY_FAILURES) {
|
||||
log("error", "server", `Gateway ${MAX_GATEWAY_FAILURES}x nicht erreichbar — Neustart (Netzwerk-Namespace veraltet?)`);
|
||||
// Kurze Verzoegerung damit die Log-Nachricht noch gesendet wird
|
||||
setTimeout(() => process.exit(1), 500);
|
||||
}
|
||||
}
|
||||
|
||||
function nextReqId() {
|
||||
return `diag-${++reqIdCounter}`;
|
||||
}
|
||||
|
|
@ -191,6 +212,7 @@ async function connectGateway() {
|
|||
state.gateway.status = "connected";
|
||||
state.gateway.handshakeOk = true;
|
||||
state.gateway.lastError = null;
|
||||
gatewayFailCount = 0;
|
||||
} else {
|
||||
const error = typeof response.error === "string"
|
||||
? response.error
|
||||
|
|
@ -219,6 +241,7 @@ async function connectGateway() {
|
|||
state.gateway.handshakeOk = false;
|
||||
gatewayWs = null;
|
||||
broadcastState();
|
||||
checkGatewayHealth();
|
||||
// Auto-Reconnect nach 5s
|
||||
setTimeout(connectGateway, 5000);
|
||||
});
|
||||
|
|
@ -236,6 +259,7 @@ async function connectGateway() {
|
|||
state.gateway.handshakeOk = false;
|
||||
gatewayWs = null;
|
||||
broadcastState();
|
||||
checkGatewayHealth();
|
||||
// Retry nach 5s
|
||||
setTimeout(connectGateway, 5000);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,10 +25,11 @@ services:
|
|||
privileged: true # ARIAs Wohnung — sie hat die Schlüssel
|
||||
depends_on:
|
||||
- proxy
|
||||
ports:
|
||||
- "3001:3001" # Diagnostic Web-UI (laeuft im shared network)
|
||||
environment:
|
||||
- CANVAS_HOST=127.0.0.1
|
||||
- OPENCLAW_GATEWAY_TOKEN=${ARIA_AUTH_TOKEN}
|
||||
- OPENCLAW_GATEWAY_BIND=0.0.0.0
|
||||
- DEFAULT_MODEL=proxy/claude-sonnet-4
|
||||
- RATE_LIMIT_PER_USER=30
|
||||
- DISPLAY=:0
|
||||
|
|
@ -51,6 +52,7 @@ services:
|
|||
container_name: aria-bridge
|
||||
depends_on:
|
||||
- aria
|
||||
network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789
|
||||
volumes:
|
||||
- ./aria-data/voices:/voices:ro # TTS Stimmen
|
||||
- ./aria-data/config/aria.env:/config/aria.env
|
||||
|
|
@ -61,7 +63,6 @@ services:
|
|||
- /dev/snd
|
||||
environment:
|
||||
- PULSE_SERVER=unix:/run/user/1000/pulse/native
|
||||
- ARIA_CORE_WS=ws://aria:18789 # Gateway über aria-net (nicht localhost)
|
||||
- ARIA_AUTH_TOKEN=${ARIA_AUTH_TOKEN:-}
|
||||
- RVS_HOST=${RVS_HOST:-}
|
||||
- RVS_PORT=${RVS_PORT:-443}
|
||||
|
|
@ -69,8 +70,6 @@ services:
|
|||
- RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
|
||||
- RVS_TOKEN=${RVS_TOKEN:-}
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- aria-net
|
||||
|
||||
# ─── Diagnostic (Selbstcheck-UI) ──────────────────────
|
||||
diagnostic:
|
||||
|
|
@ -78,12 +77,10 @@ services:
|
|||
container_name: aria-diagnostic
|
||||
depends_on:
|
||||
- aria
|
||||
ports:
|
||||
- "3001:3001" # Diagnostic Web-UI
|
||||
network_mode: "service:aria" # Teilt Netzwerk mit aria-core → localhost:18789
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
environment:
|
||||
- ARIA_CORE_WS=ws://aria:18789 # Gateway über aria-net (nicht localhost)
|
||||
- ARIA_AUTH_TOKEN=${ARIA_AUTH_TOKEN:-}
|
||||
- PROXY_URL=http://proxy:3456
|
||||
- RVS_HOST=${RVS_HOST:-}
|
||||
|
|
@ -92,8 +89,6 @@ services:
|
|||
- RVS_TLS_FALLBACK=${RVS_TLS_FALLBACK:-true}
|
||||
- RVS_TOKEN=${RVS_TOKEN:-}
|
||||
restart: unless-stopped
|
||||
networks:
|
||||
- aria-net
|
||||
|
||||
volumes:
|
||||
openclaw-config: # Persistiert ~/.openclaw (Model, Auth, Sessions)
|
||||
|
|
|
|||
Loading…
Reference in New Issue