fix(proxy): Idle-Watchdog statt Hard-Timeout fuer lange Agent-Sessions
Pentests u.ae. brauchen oft >20min — der bisherige 20-min Hard-Cutoff in claude-max-api-proxy's subprocess/manager.js killte den Subprocess mitten in der Arbeit, egal wie aktiv ARIA gerade war. Loesung: - Hard-Timeout via sed auf 24h hochgesetzt (Last-Resort gegen wirklich haengende Subprozesse). - Eigener Idle-Watchdog in routes.js: Subprocess wird gekillt erst wenn ueber ARIA_IDLE_TIMEOUT_MS (Default 20min) keine message/content_delta Events ankommen. Jede Aktivitaet resettet den Timer. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -12,7 +12,7 @@ services:
|
||||
DIST=$$(find /usr/local/lib -path '*/claude-max-api-proxy/dist' -type d | head -1) &&
|
||||
sed -i 's/startServer({ port })/startServer({ port, host: process.env.HOST || \"127.0.0.1\" })/' $$DIST/server/standalone.js &&
|
||||
sed -i 's/\"--no-session-persistence\",/\"--no-session-persistence\",\"--dangerously-skip-permissions\",/' $$DIST/subprocess/manager.js &&
|
||||
sed -i 's/const DEFAULT_TIMEOUT = 300000;/const DEFAULT_TIMEOUT = 1200000;/' $$DIST/subprocess/manager.js &&
|
||||
sed -i 's/const DEFAULT_TIMEOUT = 300000;/const DEFAULT_TIMEOUT = 86400000;/' $$DIST/subprocess/manager.js &&
|
||||
cp /proxy-patches/openai-to-cli.js $$DIST/adapter/openai-to-cli.js &&
|
||||
cp /proxy-patches/cli-to-openai.js $$DIST/adapter/cli-to-openai.js &&
|
||||
cp /proxy-patches/routes.js $$DIST/server/routes.js &&
|
||||
|
||||
@@ -35,6 +35,15 @@ const STREAM_HOOK_URL = process.env.ARIA_STREAM_HOOK_URL
|
||||
const TOOL_RESULT_MAX_CHARS = 4096;
|
||||
const TOOL_INPUT_MAX_CHARS = 2048;
|
||||
|
||||
// Idle-Timeout: Subprocess wird gekillt wenn ueber IDLE_TIMEOUT_MS keine
|
||||
// Aktivitaet (message/content_delta) ankommt. Loest das alte Hard-Timeout-
|
||||
// Problem fuer lange Agent-Sessions (Pentests etc.) — ARIA darf ewig
|
||||
// arbeiten solange sie regelmaessig was emittiert, aber wenn der Subprocess
|
||||
// hartnaeckig haengt, schlaegt der Watchdog trotzdem zu.
|
||||
// Default 20min Idle. Override via env ARIA_IDLE_TIMEOUT_MS.
|
||||
// 0 = deaktiviert (nicht empfohlen).
|
||||
const IDLE_TIMEOUT_MS = parseInt(process.env.ARIA_IDLE_TIMEOUT_MS || "1200000", 10);
|
||||
|
||||
/**
|
||||
* Generic Fire-and-forget POST an die Bridge. Keine Awaits, keine Fehler
|
||||
* nach oben. Eingesetzt fuer Tool-Hook + Stream-Hook.
|
||||
@@ -91,6 +100,50 @@ function _trackSubprocess(requestId, subprocess) {
|
||||
subprocess.on("error", cleanup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Idle-Watchdog: killt den Subprocess wenn ueber IDLE_TIMEOUT_MS hinweg
|
||||
* keine message/content_delta Events ankommen. Wird beim Start gesetzt,
|
||||
* bei jedem Event reset, bei close/error/result gestoppt.
|
||||
*
|
||||
* Stream-Event 'end' wird durch den normalen close-Listener im Handler
|
||||
* gefeuert — wir muessen hier nichts extra emittieren.
|
||||
*/
|
||||
function _attachIdleWatchdog(subprocess, requestId) {
|
||||
if (!IDLE_TIMEOUT_MS || IDLE_TIMEOUT_MS <= 0) return; // disabled
|
||||
let timer = null;
|
||||
let killed = false;
|
||||
|
||||
function _kill() {
|
||||
if (killed) return;
|
||||
killed = true;
|
||||
const mins = Math.round(IDLE_TIMEOUT_MS / 60000);
|
||||
console.warn(`[aria-idle] killing subprocess ${requestId} after ${mins}min idle`);
|
||||
try { subprocess.kill(); } catch (_) {}
|
||||
_emitStreamEvent(requestId, "end", { reason: "idle_timeout", idleMs: IDLE_TIMEOUT_MS });
|
||||
}
|
||||
|
||||
function _reset() {
|
||||
if (killed) return;
|
||||
if (timer) clearTimeout(timer);
|
||||
timer = setTimeout(_kill, IDLE_TIMEOUT_MS);
|
||||
}
|
||||
|
||||
function _stop() {
|
||||
if (timer) { clearTimeout(timer); timer = null; }
|
||||
}
|
||||
|
||||
// Initial-Timer setzen
|
||||
_reset();
|
||||
|
||||
// Jedes Event vom Subprozess zaehlt als Lebenszeichen
|
||||
subprocess.on("message", _reset);
|
||||
subprocess.on("content_delta", _reset);
|
||||
// Result/close/error → endgueltig stop
|
||||
subprocess.on("result", _stop);
|
||||
subprocess.on("close", _stop);
|
||||
subprocess.on("error", _stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hookt assistant + user Events und pusht beides an Bridge:
|
||||
* - Alt-API: nur Tool-Namen an /internal/agent-activity (Gedanken-Stream)
|
||||
@@ -177,8 +230,11 @@ export async function handleChatCompletions(req, res) {
|
||||
const subprocess = new ClaudeSubprocess();
|
||||
// ARIA-Patch: Tool-Use-Events + voller Live-Stream an die Bridge.
|
||||
// Plus: Subprocess fuer Not-Aus tracken (Hard-Kill via /v1/cancel-all).
|
||||
// Plus: Idle-Watchdog — Subprocess darf ewig laufen solange Events
|
||||
// kommen, wird aber gekillt nach IDLE_TIMEOUT_MS Inaktivitaet.
|
||||
_attachToolHook(subprocess, requestId);
|
||||
_trackSubprocess(requestId, subprocess);
|
||||
_attachIdleWatchdog(subprocess, requestId);
|
||||
_emitStreamEvent(requestId, "start", { model: body.model || null });
|
||||
subprocess.on("result", () => _emitStreamEvent(requestId, "end", { reason: "result" }));
|
||||
subprocess.on("close", (code) => _emitStreamEvent(requestId, "end", { reason: "close", code }));
|
||||
|
||||
Reference in New Issue
Block a user