diff --git a/aria-brain/main.py b/aria-brain/main.py index 5d81d6d..43b58e2 100644 --- a/aria-brain/main.py +++ b/aria-brain/main.py @@ -29,6 +29,7 @@ from conversation import Conversation from proxy_client import ProxyClient from agent import Agent import skills as skills_mod +import metrics as metrics_mod logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") logger = logging.getLogger("aria-brain") @@ -404,6 +405,15 @@ def conversation_distill_now(): return agent().distill_old_turns() +# ─── Call-Metrics (Token / Quota-Monitoring) ──────────────────────── + +@app.get("/metrics/calls") +def metrics_calls(): + """Liefert Aggregate fuer 1h / 5h / 24h / 30d. + Jedes Window: {window_seconds, calls, tokens_in, tokens_out, by_model}.""" + return metrics_mod.stats() + + # ─── Skills ───────────────────────────────────────────────────────── class SkillCreate(BaseModel): diff --git a/aria-brain/metrics.py b/aria-brain/metrics.py new file mode 100644 index 0000000..b49b254 --- /dev/null +++ b/aria-brain/metrics.py @@ -0,0 +1,133 @@ +""" +Call-Metrics fuer den Proxy-Client. + +Pro Claude-Call wird ein Eintrag in /data/metrics.jsonl angehaengt: + + {"ts": , "model": "...", "in": , "out": } + +Tokens-Schaetzung: characters / 4 (Anthropic-Default-Heuristik). Nicht exakt +aber gut genug fuer Quota-Monitoring. Wir summieren nicht in-memory weil +der Brain-Container neugestartet werden kann — alles auf Disk. + +Auswertung via aggregate(window_seconds) — liefert {calls, tokens_in, tokens_out} +fuer die letzten N Sekunden. Lazy gelesen, keine grossen Datenmengen erwartet +(bei 1000 Calls/Tag ~70 KB pro Monat). + +Auto-Rotate: bei > 50k Zeilen werden die aeltesten 25k weggeschnitten. +""" + +from __future__ import annotations + +import json +import logging +import os +import time +from pathlib import Path +from typing import List + +logger = logging.getLogger(__name__) + +METRICS_FILE = Path(os.environ.get("METRICS_FILE", "/data/metrics.jsonl")) +ROTATE_AT = 50_000 +ROTATE_KEEP = 25_000 + + +def _estimate_tokens(text: str) -> int: + """Anthropic-Default: ~4 chars pro Token. Grob genug.""" + if not text: + return 0 + return max(1, len(text) // 4) + + +def _messages_tokens(messages: list) -> int: + total = 0 + for m in messages: + # Pydantic-Model oder dict + if hasattr(m, "content"): + total += _estimate_tokens(m.content or "") + elif isinstance(m, dict): + c = m.get("content") or "" + if isinstance(c, str): + total += _estimate_tokens(c) + return total + + +def log_call(model: str, messages_in: list, reply_text: str = "") -> None: + """Eine Call-Metric anhaengen. Robust gegen Fehler (silent fail).""" + try: + tokens_in = _messages_tokens(messages_in) + tokens_out = _estimate_tokens(reply_text) + line = json.dumps({ + "ts": int(time.time() * 1000), + "model": model, + "in": tokens_in, + "out": tokens_out, + }) + METRICS_FILE.parent.mkdir(parents=True, exist_ok=True) + with METRICS_FILE.open("a", encoding="utf-8") as f: + f.write(line + "\n") + # Sanftes Rotate ohne hohe IO-Kosten — nur alle 1000 Calls checken + if (tokens_in + tokens_out) % 1000 < 4: + _maybe_rotate() + except Exception as exc: + logger.warning("metrics.log_call: %s", exc) + + +def _maybe_rotate() -> None: + try: + if not METRICS_FILE.exists(): + return + with METRICS_FILE.open("r", encoding="utf-8") as f: + lines = f.readlines() + if len(lines) > ROTATE_AT: + keep = lines[-ROTATE_KEEP:] + METRICS_FILE.write_text("".join(keep), encoding="utf-8") + logger.info("metrics rotated: %d → %d Zeilen", len(lines), len(keep)) + except Exception as exc: + logger.warning("metrics rotate: %s", exc) + + +def aggregate(window_seconds: int) -> dict: + """Aggregiert die Calls der letzten N Sekunden.""" + now_ms = int(time.time() * 1000) + cutoff_ms = now_ms - (window_seconds * 1000) + calls = 0 + tokens_in = 0 + tokens_out = 0 + by_model: dict[str, int] = {} + if METRICS_FILE.exists(): + try: + for raw in METRICS_FILE.read_text(encoding="utf-8").splitlines(): + raw = raw.strip() + if not raw: + continue + try: + obj = json.loads(raw) + except Exception: + continue + if obj.get("ts", 0) < cutoff_ms: + continue + calls += 1 + tokens_in += int(obj.get("in") or 0) + tokens_out += int(obj.get("out") or 0) + m = obj.get("model", "?") + by_model[m] = by_model.get(m, 0) + 1 + except Exception as exc: + logger.warning("metrics aggregate: %s", exc) + return { + "window_seconds": window_seconds, + "calls": calls, + "tokens_in": tokens_in, + "tokens_out": tokens_out, + "by_model": by_model, + } + + +def stats() -> dict: + """Komplett-Snapshot mit den drei wichtigsten Fenstern.""" + return { + "h1": aggregate(3600), + "h5": aggregate(5 * 3600), + "h24": aggregate(24 * 3600), + "d30": aggregate(30 * 24 * 3600), + } diff --git a/aria-brain/proxy_client.py b/aria-brain/proxy_client.py index d747a07..fb9c95f 100644 --- a/aria-brain/proxy_client.py +++ b/aria-brain/proxy_client.py @@ -18,6 +18,8 @@ from typing import List, Optional import httpx from pydantic import BaseModel +import metrics + logger = logging.getLogger(__name__) RUNTIME_CONFIG_FILE = Path("/shared/config/runtime.json") @@ -135,6 +137,9 @@ class ProxyClient: "arguments": args, }) + # Call-Metric anhaengen — Token-Schaetzung fuer Quota-Monitoring + metrics.log_call(payload["model"], messages, content or "") + return ProxyResult(content=content or "", tool_calls=tool_calls, finish_reason=finish_reason) def close(self): diff --git a/diagnostic/index.html b/diagnostic/index.html index 16bfc57..07c56df 100644 --- a/diagnostic/index.html +++ b/diagnostic/index.html @@ -120,6 +120,14 @@ /* Settings */ .settings-section { margin-bottom:20px; } .settings-section h2 { margin-bottom:12px; } + /* Metric-Zellen im Token/Calls-Card */ + .metric-cell { background:#0D0D1A; border:1px solid #1E1E2E; border-radius:6px; padding:8px 10px; } + .metric-cell .metric-label { color:#8888AA; font-size:10px; } + .metric-cell .metric-value { color:#E0E0F0; font-size:18px; font-weight:bold; margin-top:2px; } + .metric-cell .metric-sub { color:#555570; font-size:10px; margin-top:2px; font-family:monospace; } + .metric-cell.warn { border-color:#FFD60A; background:rgba(255,214,10,0.08); } + .metric-cell.crit { border-color:#FF6B6B; background:rgba(255,107,107,0.10); } + /* Info-Button: kleines (i) neben Ueberschriften */ .info-btn { background:transparent; border:1px solid #0096FF; color:#0096FF; width:20px; height:20px; border-radius:50%; padding:0; font-size:11px; font-weight:bold; cursor:pointer; margin-left:6px; @@ -716,6 +724,37 @@ +
+

Token / Calls

+
+
+ + + + +
+
+
+
letzte 1h
+
letzte 5h (Quota-Fenster)
+
letzte 24h
+
letzte 30 Tage
+
+
+ Pro User-Frage = mind. 1 Claude-Call. Bei Tool-Use (Skills) bis zu 8 Calls. Plus 1 Destillat-Call bei langen Konversationen. + Token-Werte sind Schaetzung (chars/4) — nicht exakt, aber gut genug fuer Quota-Monitoring. +
+
+
+

Bootstrap & Migration

@@ -2620,6 +2659,7 @@ loadBrainStatus(); loadBrainMemoryList(); refreshImportFiles(); + loadMetrics(); } else if (tab === 'files') { loadFiles(); } else if (tab === 'skills') { @@ -3299,6 +3339,126 @@ if (m) m.classList.remove('open'); } + // ── Token / Calls Metrics ────────────────────────────── + // Anthropic-Subscription-Limits (Stand 2026, fuer Sonnet, "ca." weil + // Anthropic offiziell "fair use" sagt). Custom = User waehlt selbst. + const PLAN_LIMITS = { + pro: { h5: 45, label: 'Pro (~$20)' }, + max5: { h5: 225, label: 'Max 5x (~$90-100)' }, + max20: { h5: 900, label: 'Max 20x (~$200)' }, + }; + + function getActivePlanLimit() { + const v = (document.getElementById('metrics-plan') || {}).value || 'max5'; + if (v === 'custom') { + const n = parseInt((document.getElementById('metrics-custom-limit') || {}).value || '0', 10); + return { h5: n > 0 ? n : 225, label: 'Custom' }; + } + return PLAN_LIMITS[v] || PLAN_LIMITS.max5; + } + + function onMetricsPlanChange() { + const v = document.getElementById('metrics-plan').value; + const customRow = document.getElementById('metrics-custom-row'); + if (customRow) customRow.style.display = v === 'custom' ? '' : 'none'; + try { + localStorage.setItem('aria_metrics_plan', v); + if (v === 'custom') { + const n = document.getElementById('metrics-custom-limit').value; + if (n) localStorage.setItem('aria_metrics_custom_limit', n); + } + } catch {} + loadMetrics(); + } + + function restoreMetricsPlan() { + try { + const v = localStorage.getItem('aria_metrics_plan'); + if (v) document.getElementById('metrics-plan').value = v; + const n = localStorage.getItem('aria_metrics_custom_limit'); + if (n) document.getElementById('metrics-custom-limit').value = n; + onMetricsPlanChange(); + } catch {} + } + + function fmtTokens(n) { + if (n < 1000) return String(n); + if (n < 1_000_000) return (n / 1000).toFixed(1) + 'k'; + return (n / 1_000_000).toFixed(2) + 'M'; + } + + async function loadMetrics() { + try { + const r = await fetch('/api/brain/metrics/calls'); + if (!r.ok) throw new Error('HTTP ' + r.status); + const d = await r.json(); + renderMetrics(d); + } catch (e) { + const bar = document.getElementById('metrics-bar'); + if (bar) bar.innerHTML = `Metrics nicht erreichbar: ${escapeHtml(e.message)}`; + } + } + + function renderMetrics(d) { + const setCell = (id, agg) => { + const el = document.getElementById(id); + if (!el) return; + const valueEl = el.querySelector('.metric-value'); + if (valueEl) valueEl.textContent = `${agg.calls} Calls`; + // Sub-Zeile mit Tokens + let sub = el.querySelector('.metric-sub'); + if (!sub) { + sub = document.createElement('div'); + sub.className = 'metric-sub'; + el.appendChild(sub); + } + sub.textContent = `${fmtTokens(agg.tokens_in)} in · ${fmtTokens(agg.tokens_out)} out`; + }; + setCell('metrics-h1', d.h1); + setCell('metrics-h5', d.h5); + setCell('metrics-h24', d.h24); + setCell('metrics-d30', d.d30); + + // 5h-Fenster gegen Plan-Limit: Warn-Klassen + const plan = getActivePlanLimit(); + const limit = plan.h5; + const pct = limit > 0 ? Math.min(100, Math.round(d.h5.calls / limit * 100)) : 0; + const h5el = document.getElementById('metrics-h5'); + if (h5el) { + h5el.classList.remove('warn', 'crit'); + if (pct >= 90) h5el.classList.add('crit'); + else if (pct >= 80) h5el.classList.add('warn'); + } + + // Progress-Bar oben + const bar = document.getElementById('metrics-bar'); + if (bar) { + const color = pct >= 90 ? '#FF6B6B' : pct >= 80 ? '#FFD60A' : '#0096FF'; + bar.innerHTML = ` +
+ 5h-Quota (${escapeHtml(plan.label)}): ${d.h5.calls} / ${limit} + (${pct}%) +
+
+
+
+ `; + } + } + + // Periodisch refreshen (alle 30s) wenn Gehirn-Tab offen + if (!window.__metricsInterval) { + window.__metricsInterval = setInterval(() => { + const t = document.getElementById('tab-brain'); + if (t && t.classList.contains('visible')) { + try { loadMetrics(); } catch {} + } + }, 30000); + } + + // Beim ersten Brain-Tab-Open: Plan restoren + setTimeout(restoreMetricsPlan, 100); + // Vor-definierte Info-Blocks const INFO_TEXTS = { 'brain-status': { @@ -3342,6 +3502,29 @@

Such-Feld: semantische Suche via Embedder + Qdrant. Findet sinngemaess, nicht nur Stichworte.

`, }, + 'metrics': { + title: 'Token / Calls — Quota-Monitoring', + html: ` +

Anthropic gibt fuer ihre Subscriptions keine exakten Token-Limits raus, + sondern "fair use". Kursierende Schaetzungen (Stand 2026, fuer Sonnet):

+
    +
  • Pro (~$20): ca. 45 Calls pro 5h-Fenster
  • +
  • Max 5x (~$90-100): ca. 225 Calls pro 5h-Fenster
  • +
  • Max 20x (~$200): ca. 900 Calls pro 5h-Fenster
  • +
+

Wichtig: HTTP-Call ≠ User-Frage. Pro User-Frage:

+
    +
  • Einfache Antwort ohne Tool: 1 Call
  • +
  • Mit 1 Tool (Skill): 2 Calls (Tool-Entscheidung + finale Antwort)
  • +
  • Multi-Tool-Chain: bis zu 8 Calls (MAX_TOOL_ITERATIONS)
  • +
  • Bei >60 Turns Konversation: +1 Destillat-Call im Hintergrund
  • +
+

Token-Werte sind Schaetzung (chars / 4, Anthropic-Heuristik) — nicht exakt, + aber gut genug fuer Quota-Monitoring. Persistent in /data/metrics.jsonl, + Auto-Rotate bei 50k Eintraegen.

+

Warn-Schwellen: 5h-Counter wird gelb bei 80%, rot bei 90% des Plan-Limits.

+ `, + }, 'bootstrap': { title: 'Bootstrap & Migration — die drei Wege', html: `