""" Claude-Aufruf ueber den lokalen Proxy. Der Proxy (claude-max-api-proxy) bietet eine OpenAI-kompatible API unter http://proxy:3456/v1/chat/completions. Wir nutzen non-streaming mit einem laengeren Timeout — Claude Code spawnt pro Anfrage einen neuen CLI-Prozess (Cold-Start), das dauert. """ from __future__ import annotations import json import logging import os from pathlib import Path from typing import List, Optional import httpx from pydantic import BaseModel import metrics logger = logging.getLogger(__name__) RUNTIME_CONFIG_FILE = Path("/shared/config/runtime.json") ENV_MODEL = os.environ.get("BRAIN_MODEL", "claude-sonnet-4") PROXY_URL = os.environ.get("PROXY_URL", "http://proxy:3456") PROXY_TIMEOUT_SEC = float(os.environ.get("PROXY_TIMEOUT_SEC", "300")) def _read_model_from_runtime() -> str: """Liest brainModel aus runtime.json. Fallback: ENV BRAIN_MODEL.""" try: if RUNTIME_CONFIG_FILE.exists(): data = json.loads(RUNTIME_CONFIG_FILE.read_text(encoding="utf-8")) m = (data.get("brainModel") or "").strip() if m: return m except Exception as exc: logger.warning("runtime.json lesen fehlgeschlagen: %s", exc) return ENV_MODEL DEFAULT_MODEL = _read_model_from_runtime() class Message(BaseModel): role: str # "system" | "user" | "assistant" | "tool" content: Optional[str] = None tool_calls: Optional[list] = None tool_call_id: Optional[str] = None name: Optional[str] = None # nur fuer role=tool class ProxyResult(BaseModel): content: str = "" tool_calls: list = [] # je: {"id", "name", "arguments" (dict)} finish_reason: str = "" class ProxyClient: def __init__(self, base_url: str = PROXY_URL, model: str = DEFAULT_MODEL): self.base_url = base_url.rstrip("/") self.model = model # Persistente Client-Connection — vermeidet TCP-Handshake bei jedem Call self._client = httpx.Client(timeout=PROXY_TIMEOUT_SEC) def chat(self, messages: List[Message], model: Optional[str] = None) -> str: """Convenience: einfacher Chat ohne Tools. Gibt nur den Reply-String zurueck.""" result = self.chat_full(messages, tools=None, model=model) if not result.content: raise RuntimeError("Proxy lieferte leeren content") return result.content def chat_full( self, messages: List[Message], tools: Optional[list] = None, model: Optional[str] = None, ) -> ProxyResult: """Full chat — kann Tool-Calls liefern (wenn tools mitgegeben). tools-Format ist OpenAI-Style: [{"type":"function","function":{"name":..,"description":..,"parameters":{...}}}, ...] """ url = f"{self.base_url}/v1/chat/completions" # Pydantic-Dumps mit exclude_none damit role=tool ohne tool_calls geht payload = { "model": model or self.model, "messages": [m.model_dump(exclude_none=True) for m in messages], } if tools: payload["tools"] = tools logger.info("Proxy → %s (%d Messages, %d tools, model=%s)", url, len(messages), len(tools or []), payload["model"]) try: r = self._client.post(url, json=payload) except httpx.RequestError as exc: raise RuntimeError(f"Proxy unreachable: {exc}") from exc if r.status_code != 200: raise RuntimeError(f"Proxy HTTP {r.status_code}: {r.text[:300]}") try: data = r.json() except Exception as exc: raise RuntimeError(f"Proxy invalid JSON: {exc}") from exc choices = data.get("choices") or [] if not choices: raise RuntimeError(f"Proxy ohne choices: {str(data)[:300]}") msg = choices[0].get("message") or {} finish_reason = choices[0].get("finish_reason", "") content = msg.get("content") or "" if isinstance(content, list): content = "".join( part.get("text", "") for part in content if isinstance(part, dict) and part.get("type") == "text" ) tool_calls_raw = msg.get("tool_calls") or [] tool_calls = [] import json as _json for tc in tool_calls_raw: fn = tc.get("function") or {} args_raw = fn.get("arguments", "{}") args: dict if isinstance(args_raw, dict): args = args_raw else: try: args = _json.loads(args_raw) except Exception: args = {"_raw": args_raw} tool_calls.append({ "id": tc.get("id", ""), "name": fn.get("name", ""), "arguments": args, }) # Call-Metric anhaengen — Token-Schaetzung fuer Quota-Monitoring metrics.log_call(payload["model"], messages, content or "") return ProxyResult(content=content or "", tool_calls=tool_calls, finish_reason=finish_reason) def close(self): try: self._client.close() except Exception: pass