Files
ARIA-AGENT/aria-brain/proxy_client.py
T
duffyduck b2f7d6dda2 feat(brain+diagnostic): Token/Call-Metrics mit Subscription-Plan-Tracking
Stefan hat den Max 5x Plan (~\$90-100/Monat), ungefaehres Limit 225 Calls pro
5h-Fenster fuer Sonnet. Damit nicht in eine Tool-Loop-Schleife laufen ohne
es zu merken: kleine Metrics-Pipeline, sichtbar in der Diagnostic.

aria-brain/metrics.py
  Append-only JSONL Logger unter /data/metrics.jsonl. Pro Claude-Call eine
  Zeile {ts, model, in, out} mit Token-Schaetzung (chars/4, Anthropic-
  Heuristik). aggregate(window) zaehlt die letzten N Sekunden.
  Auto-Rotate bei 50k Zeilen → 25k behalten (~70 KB/Monat bei 1k Calls/Tag,
  Cap also weit oben).

aria-brain/proxy_client.py
  chat_full() ruft am Ende metrics.log_call(model, messages_in, reply).
  Failed/exception-Pfade loggen nicht (sonst false positives).

aria-brain/main.py
  GET /metrics/calls → {h1, h5, h24, d30}, jedes Window mit calls,
  tokens_in, tokens_out, by_model.

diagnostic/index.html
  Neue Card "Token / Calls" im Gehirn-Tab. Plan-Dropdown
  (Pro / Max 5x / Max 20x / Custom), localStorage-persistiert. 4 Metric-
  Zellen fuer 1h/5h/24h/30d mit Calls + Tokens. Progress-Bar oben zeigt
  5h-Counter gegen Plan-Limit. Warn-Klassen: gelb bei 80%, rot bei 90%.
  Auto-Refresh alle 30s wenn Gehirn-Tab offen, plus bei Tab-Wechsel.
  Info-Modal erklaert die Limits + dass HTTP-Call != User-Frage (Tool-Use
  kann pro Frage bis zu 8 Calls verursachen).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 23:43:56 +02:00

150 lines
5.1 KiB
Python

"""
Claude-Aufruf ueber den lokalen Proxy.
Der Proxy (claude-max-api-proxy) bietet eine OpenAI-kompatible API
unter http://proxy:3456/v1/chat/completions. Wir nutzen non-streaming
mit einem laengeren Timeout — Claude Code spawnt pro Anfrage einen
neuen CLI-Prozess (Cold-Start), das dauert.
"""
from __future__ import annotations
import json
import logging
import os
from pathlib import Path
from typing import List, Optional
import httpx
from pydantic import BaseModel
import metrics
logger = logging.getLogger(__name__)
RUNTIME_CONFIG_FILE = Path("/shared/config/runtime.json")
ENV_MODEL = os.environ.get("BRAIN_MODEL", "claude-sonnet-4")
PROXY_URL = os.environ.get("PROXY_URL", "http://proxy:3456")
PROXY_TIMEOUT_SEC = float(os.environ.get("PROXY_TIMEOUT_SEC", "300"))
def _read_model_from_runtime() -> str:
"""Liest brainModel aus runtime.json. Fallback: ENV BRAIN_MODEL."""
try:
if RUNTIME_CONFIG_FILE.exists():
data = json.loads(RUNTIME_CONFIG_FILE.read_text(encoding="utf-8"))
m = (data.get("brainModel") or "").strip()
if m:
return m
except Exception as exc:
logger.warning("runtime.json lesen fehlgeschlagen: %s", exc)
return ENV_MODEL
DEFAULT_MODEL = _read_model_from_runtime()
class Message(BaseModel):
role: str # "system" | "user" | "assistant" | "tool"
content: Optional[str] = None
tool_calls: Optional[list] = None
tool_call_id: Optional[str] = None
name: Optional[str] = None # nur fuer role=tool
class ProxyResult(BaseModel):
content: str = ""
tool_calls: list = [] # je: {"id", "name", "arguments" (dict)}
finish_reason: str = ""
class ProxyClient:
def __init__(self, base_url: str = PROXY_URL, model: str = DEFAULT_MODEL):
self.base_url = base_url.rstrip("/")
self.model = model
# Persistente Client-Connection — vermeidet TCP-Handshake bei jedem Call
self._client = httpx.Client(timeout=PROXY_TIMEOUT_SEC)
def chat(self, messages: List[Message], model: Optional[str] = None) -> str:
"""Convenience: einfacher Chat ohne Tools. Gibt nur den Reply-String zurueck."""
result = self.chat_full(messages, tools=None, model=model)
if not result.content:
raise RuntimeError("Proxy lieferte leeren content")
return result.content
def chat_full(
self,
messages: List[Message],
tools: Optional[list] = None,
model: Optional[str] = None,
) -> ProxyResult:
"""Full chat — kann Tool-Calls liefern (wenn tools mitgegeben).
tools-Format ist OpenAI-Style:
[{"type":"function","function":{"name":..,"description":..,"parameters":{...}}}, ...]
"""
url = f"{self.base_url}/v1/chat/completions"
# Pydantic-Dumps mit exclude_none damit role=tool ohne tool_calls geht
payload = {
"model": model or self.model,
"messages": [m.model_dump(exclude_none=True) for m in messages],
}
if tools:
payload["tools"] = tools
logger.info("Proxy → %s (%d Messages, %d tools, model=%s)",
url, len(messages), len(tools or []), payload["model"])
try:
r = self._client.post(url, json=payload)
except httpx.RequestError as exc:
raise RuntimeError(f"Proxy unreachable: {exc}") from exc
if r.status_code != 200:
raise RuntimeError(f"Proxy HTTP {r.status_code}: {r.text[:300]}")
try:
data = r.json()
except Exception as exc:
raise RuntimeError(f"Proxy invalid JSON: {exc}") from exc
choices = data.get("choices") or []
if not choices:
raise RuntimeError(f"Proxy ohne choices: {str(data)[:300]}")
msg = choices[0].get("message") or {}
finish_reason = choices[0].get("finish_reason", "")
content = msg.get("content") or ""
if isinstance(content, list):
content = "".join(
part.get("text", "") for part in content if isinstance(part, dict) and part.get("type") == "text"
)
tool_calls_raw = msg.get("tool_calls") or []
tool_calls = []
import json as _json
for tc in tool_calls_raw:
fn = tc.get("function") or {}
args_raw = fn.get("arguments", "{}")
args: dict
if isinstance(args_raw, dict):
args = args_raw
else:
try:
args = _json.loads(args_raw)
except Exception:
args = {"_raw": args_raw}
tool_calls.append({
"id": tc.get("id", ""),
"name": fn.get("name", ""),
"arguments": args,
})
# Call-Metric anhaengen — Token-Schaetzung fuer Quota-Monitoring
metrics.log_call(payload["model"], messages, content or "")
return ProxyResult(content=content or "", tool_calls=tool_calls, finish_reason=finish_reason)
def close(self):
try:
self._client.close()
except Exception:
pass