b2f7d6dda2
Stefan hat den Max 5x Plan (~\$90-100/Monat), ungefaehres Limit 225 Calls pro
5h-Fenster fuer Sonnet. Damit nicht in eine Tool-Loop-Schleife laufen ohne
es zu merken: kleine Metrics-Pipeline, sichtbar in der Diagnostic.
aria-brain/metrics.py
Append-only JSONL Logger unter /data/metrics.jsonl. Pro Claude-Call eine
Zeile {ts, model, in, out} mit Token-Schaetzung (chars/4, Anthropic-
Heuristik). aggregate(window) zaehlt die letzten N Sekunden.
Auto-Rotate bei 50k Zeilen → 25k behalten (~70 KB/Monat bei 1k Calls/Tag,
Cap also weit oben).
aria-brain/proxy_client.py
chat_full() ruft am Ende metrics.log_call(model, messages_in, reply).
Failed/exception-Pfade loggen nicht (sonst false positives).
aria-brain/main.py
GET /metrics/calls → {h1, h5, h24, d30}, jedes Window mit calls,
tokens_in, tokens_out, by_model.
diagnostic/index.html
Neue Card "Token / Calls" im Gehirn-Tab. Plan-Dropdown
(Pro / Max 5x / Max 20x / Custom), localStorage-persistiert. 4 Metric-
Zellen fuer 1h/5h/24h/30d mit Calls + Tokens. Progress-Bar oben zeigt
5h-Counter gegen Plan-Limit. Warn-Klassen: gelb bei 80%, rot bei 90%.
Auto-Refresh alle 30s wenn Gehirn-Tab offen, plus bei Tab-Wechsel.
Info-Modal erklaert die Limits + dass HTTP-Call != User-Frage (Tool-Use
kann pro Frage bis zu 8 Calls verursachen).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
529 lines
16 KiB
Python
529 lines
16 KiB
Python
"""
|
|
ARIA Brain — FastAPI-Einstieg.
|
|
|
|
Phase B Punkt 1: nur Skeleton.
|
|
- /health → Liveness
|
|
- /memory/list → alle Punkte (gefiltert)
|
|
- /memory/pinned → Hot Memory
|
|
- /memory/search?q=...&k=5 → semantische Suche
|
|
- /memory/save → neuen Punkt anlegen
|
|
- /memory/update/{id} → Punkt aendern (re-embed wenn content geaendert)
|
|
- /memory/delete/{id} → Punkt loeschen
|
|
- /memory/stats → Anzahl Punkte pro Type
|
|
|
|
/chat (Conversation-Loop) und /skills/* kommen in spaeteren Phasen.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import os
|
|
from typing import List, Optional
|
|
|
|
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
|
|
from fastapi.responses import Response
|
|
from pydantic import BaseModel, Field
|
|
|
|
from memory import Embedder, VectorStore, MemoryPoint
|
|
from conversation import Conversation
|
|
from proxy_client import ProxyClient
|
|
from agent import Agent
|
|
import skills as skills_mod
|
|
import metrics as metrics_mod
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s")
|
|
logger = logging.getLogger("aria-brain")
|
|
|
|
QDRANT_HOST = os.environ.get("QDRANT_HOST", "aria-qdrant")
|
|
QDRANT_PORT = int(os.environ.get("QDRANT_PORT", "6333"))
|
|
|
|
app = FastAPI(title="ARIA Brain", version="0.1.0")
|
|
|
|
_embedder: Optional[Embedder] = None
|
|
_store: Optional[VectorStore] = None
|
|
_conversation: Optional[Conversation] = None
|
|
_proxy: Optional[ProxyClient] = None
|
|
_agent: Optional[Agent] = None
|
|
|
|
|
|
def embedder() -> Embedder:
|
|
global _embedder
|
|
if _embedder is None:
|
|
_embedder = Embedder()
|
|
return _embedder
|
|
|
|
|
|
def store() -> VectorStore:
|
|
global _store
|
|
if _store is None:
|
|
_store = VectorStore(host=QDRANT_HOST, port=QDRANT_PORT)
|
|
return _store
|
|
|
|
|
|
def conversation() -> Conversation:
|
|
global _conversation
|
|
if _conversation is None:
|
|
_conversation = Conversation()
|
|
return _conversation
|
|
|
|
|
|
def proxy_client() -> ProxyClient:
|
|
global _proxy
|
|
if _proxy is None:
|
|
_proxy = ProxyClient()
|
|
return _proxy
|
|
|
|
|
|
def agent() -> Agent:
|
|
global _agent
|
|
if _agent is None:
|
|
_agent = Agent(store(), embedder(), conversation(), proxy_client())
|
|
return _agent
|
|
|
|
|
|
# ─── Pydantic-Schemas ─────────────────────────────────────────────────
|
|
|
|
class MemoryIn(BaseModel):
|
|
type: str = Field(..., description="identity|rule|preference|tool|skill|fact|conversation|reminder")
|
|
title: str
|
|
content: str
|
|
pinned: bool = False
|
|
category: str = ""
|
|
source: str = "manual"
|
|
tags: List[str] = Field(default_factory=list)
|
|
conversation_id: Optional[str] = None
|
|
|
|
|
|
class MemoryUpdate(BaseModel):
|
|
title: Optional[str] = None
|
|
content: Optional[str] = None
|
|
pinned: Optional[bool] = None
|
|
category: Optional[str] = None
|
|
tags: Optional[List[str]] = None
|
|
|
|
|
|
class MemoryOut(BaseModel):
|
|
id: str
|
|
type: str
|
|
title: str
|
|
content: str
|
|
pinned: bool
|
|
category: str
|
|
source: str
|
|
tags: List[str]
|
|
created_at: str
|
|
updated_at: str
|
|
conversation_id: Optional[str] = None
|
|
score: Optional[float] = None
|
|
|
|
@classmethod
|
|
def from_point(cls, p: MemoryPoint) -> "MemoryOut":
|
|
return cls(**p.__dict__)
|
|
|
|
|
|
# ─── Health ───────────────────────────────────────────────────────────
|
|
|
|
@app.get("/health")
|
|
def health():
|
|
try:
|
|
n = store().count()
|
|
return {"status": "ok", "memory_count": n, "qdrant": f"{QDRANT_HOST}:{QDRANT_PORT}"}
|
|
except Exception as exc:
|
|
return {"status": "degraded", "error": str(exc), "qdrant": f"{QDRANT_HOST}:{QDRANT_PORT}"}
|
|
|
|
|
|
# ─── Memory-Endpoints ─────────────────────────────────────────────────
|
|
|
|
@app.get("/memory/stats")
|
|
def memory_stats():
|
|
s = store()
|
|
points = s.list_all()
|
|
by_type = {}
|
|
pinned = 0
|
|
for p in points:
|
|
by_type[p.type] = by_type.get(p.type, 0) + 1
|
|
if p.pinned:
|
|
pinned += 1
|
|
return {"total": len(points), "pinned": pinned, "by_type": by_type}
|
|
|
|
|
|
@app.get("/memory/list", response_model=List[MemoryOut])
|
|
def memory_list(type: Optional[str] = None, limit: int = 200):
|
|
s = store()
|
|
points = s.list_by_type(type, limit=limit) if type else s.list_all(limit=limit)
|
|
return [MemoryOut.from_point(p) for p in points]
|
|
|
|
|
|
@app.get("/memory/pinned", response_model=List[MemoryOut])
|
|
def memory_pinned():
|
|
return [MemoryOut.from_point(p) for p in store().list_pinned()]
|
|
|
|
|
|
@app.get("/memory/search", response_model=List[MemoryOut])
|
|
def memory_search(q: str, k: int = 5, type: Optional[str] = None, include_pinned: bool = False):
|
|
vec = embedder().embed(q)
|
|
points = store().search(vec, k=k, type_filter=type, exclude_pinned=not include_pinned)
|
|
return [MemoryOut.from_point(p) for p in points]
|
|
|
|
|
|
@app.post("/memory/save", response_model=MemoryOut)
|
|
def memory_save(body: MemoryIn):
|
|
s = store()
|
|
vec = embedder().embed(body.content)
|
|
point = MemoryPoint(
|
|
id="",
|
|
type=body.type,
|
|
title=body.title,
|
|
content=body.content,
|
|
pinned=body.pinned,
|
|
category=body.category,
|
|
source=body.source,
|
|
tags=body.tags,
|
|
conversation_id=body.conversation_id,
|
|
)
|
|
pid = s.upsert(point, vec)
|
|
saved = s.get(pid)
|
|
return MemoryOut.from_point(saved)
|
|
|
|
|
|
@app.patch("/memory/update/{point_id}", response_model=MemoryOut)
|
|
def memory_update(point_id: str, body: MemoryUpdate):
|
|
s = store()
|
|
existing = s.get(point_id)
|
|
if not existing:
|
|
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
|
|
|
|
content_changed = body.content is not None and body.content != existing.content
|
|
if body.title is not None:
|
|
existing.title = body.title
|
|
if body.content is not None:
|
|
existing.content = body.content
|
|
if body.pinned is not None:
|
|
existing.pinned = body.pinned
|
|
if body.category is not None:
|
|
existing.category = body.category
|
|
if body.tags is not None:
|
|
existing.tags = body.tags
|
|
|
|
vec = embedder().embed(existing.content) if content_changed else None
|
|
if vec is None:
|
|
# Vektor unveraendert lassen — nur Payload neu schreiben
|
|
from qdrant_client.http import models as qm
|
|
from memory.vector_store import COLLECTION
|
|
s.client.set_payload(
|
|
collection_name=COLLECTION,
|
|
payload=existing.to_payload() | {"updated_at": __import__("datetime").datetime.now(__import__("datetime").timezone.utc).isoformat()},
|
|
points=[point_id],
|
|
)
|
|
saved = s.get(point_id)
|
|
else:
|
|
s.upsert(existing, vec)
|
|
saved = s.get(point_id)
|
|
return MemoryOut.from_point(saved)
|
|
|
|
|
|
@app.delete("/memory/delete/{point_id}")
|
|
def memory_delete(point_id: str):
|
|
s = store()
|
|
if not s.get(point_id):
|
|
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
|
|
s.delete(point_id)
|
|
return {"deleted": point_id}
|
|
|
|
|
|
# ─── Migration aus brain-import/ ──────────────────────────────────────
|
|
|
|
IMPORT_DIR = os.environ.get("IMPORT_DIR", "/import")
|
|
|
|
|
|
@app.post("/memory/migrate")
|
|
def memory_migrate():
|
|
"""Liest /import/*.md und schreibt atomare Memory-Punkte in die DB.
|
|
Idempotent: bei Re-Run werden Punkte mit gleicher migration_key ersetzt."""
|
|
from pathlib import Path
|
|
from migration import run_migration
|
|
s = store()
|
|
e = embedder()
|
|
result = run_migration(Path(IMPORT_DIR), s, e)
|
|
return result
|
|
|
|
|
|
@app.get("/memory/import-files")
|
|
def memory_import_files():
|
|
"""Listet was unter /import/ liegt — fuer die Diagnostic-UI."""
|
|
from pathlib import Path
|
|
d = Path(IMPORT_DIR)
|
|
if not d.exists():
|
|
return {"import_dir": str(d), "exists": False, "files": []}
|
|
out = []
|
|
for p in sorted(d.iterdir()):
|
|
if p.is_file():
|
|
try:
|
|
out.append({"name": p.name, "size": p.stat().st_size})
|
|
except Exception:
|
|
pass
|
|
return {"import_dir": str(d), "exists": True, "files": out}
|
|
|
|
|
|
# ─── Bootstrap-Snapshot ───────────────────────────────────────────────
|
|
# "Bootstrap" = alle pinned Memories. Export/Import zum schnellen
|
|
# Wiederherstellen einer schlanken ARIA nach Wipe.
|
|
|
|
@app.get("/memory/export-bootstrap")
|
|
def memory_export_bootstrap():
|
|
"""Gibt alle pinned Memories als JSON zurueck — fuer Browser-Download."""
|
|
s = store()
|
|
pinned = s.list_pinned()
|
|
return {
|
|
"version": 1,
|
|
"exported_at": __import__("datetime").datetime.now(
|
|
__import__("datetime").timezone.utc
|
|
).isoformat(),
|
|
"count": len(pinned),
|
|
"memories": [
|
|
{
|
|
"type": p.type,
|
|
"title": p.title,
|
|
"content": p.content,
|
|
"pinned": True,
|
|
"category": p.category,
|
|
"source": p.source,
|
|
"tags": p.tags,
|
|
}
|
|
for p in pinned
|
|
],
|
|
}
|
|
|
|
|
|
class BootstrapBundle(BaseModel):
|
|
version: int = 1
|
|
memories: List[dict]
|
|
|
|
|
|
@app.post("/memory/import-bootstrap")
|
|
def memory_import_bootstrap(body: BootstrapBundle):
|
|
"""Loescht alle pinned Memories und importiert die im Bundle.
|
|
Cold Memory (unpinned) bleibt unangetastet.
|
|
|
|
Wenn keine Memories im Bundle: nur loeschen ist NICHT erlaubt — der
|
|
Caller soll erst exportieren und dann importieren.
|
|
"""
|
|
if not body.memories:
|
|
raise HTTPException(400, "Bundle hat keine memories — Abbruch zur Sicherheit")
|
|
|
|
s = store()
|
|
e = embedder()
|
|
|
|
# Alle aktuell pinned Punkte loeschen
|
|
from qdrant_client.http import models as qm
|
|
from memory.vector_store import COLLECTION
|
|
s.client.delete(
|
|
collection_name=COLLECTION,
|
|
points_selector=qm.FilterSelector(filter=qm.Filter(must=[
|
|
qm.FieldCondition(key="pinned", match=qm.MatchValue(value=True))
|
|
])),
|
|
)
|
|
|
|
# Neue Punkte einspeisen
|
|
created = 0
|
|
for m in body.memories:
|
|
content = (m.get("content") or "").strip()
|
|
if not content:
|
|
continue
|
|
point = MemoryPoint(
|
|
id="",
|
|
type=m.get("type", "fact"),
|
|
title=m.get("title", "(ohne Titel)"),
|
|
content=content,
|
|
pinned=True,
|
|
category=m.get("category", ""),
|
|
source=m.get("source", "bootstrap-import"),
|
|
tags=list(m.get("tags", [])),
|
|
)
|
|
vec = e.embed(content)
|
|
s.upsert(point, vec)
|
|
created += 1
|
|
|
|
return {"created": created, "deleted_previous_pinned": True}
|
|
|
|
|
|
# ─── Conversation-Loop ──────────────────────────────────────────────
|
|
|
|
class ChatIn(BaseModel):
|
|
message: str
|
|
source: str = "" # "app" / "diagnostic" / "stt" — optional
|
|
|
|
|
|
class ChatOut(BaseModel):
|
|
reply: str
|
|
turns: int
|
|
distilling: bool
|
|
events: list = Field(default_factory=list)
|
|
|
|
|
|
@app.post("/chat", response_model=ChatOut)
|
|
def chat(body: ChatIn, background: BackgroundTasks):
|
|
"""Hauptpfad. Antwort kommt synchron. Memory-Destillat laeuft
|
|
im Hintergrund nachdem die Response rausging."""
|
|
a = agent()
|
|
try:
|
|
reply = a.chat(body.message, source=body.source)
|
|
except ValueError as exc:
|
|
raise HTTPException(400, str(exc))
|
|
except RuntimeError as exc:
|
|
logger.error("chat fehlgeschlagen: %s", exc)
|
|
raise HTTPException(502, str(exc))
|
|
|
|
needs_distill = a.conversation.needs_distill()
|
|
if needs_distill:
|
|
background.add_task(a.distill_old_turns)
|
|
return ChatOut(
|
|
reply=reply,
|
|
turns=len(a.conversation.turns),
|
|
distilling=needs_distill,
|
|
events=a.pop_events(),
|
|
)
|
|
|
|
|
|
@app.get("/conversation/stats")
|
|
def conversation_stats():
|
|
return conversation().stats()
|
|
|
|
|
|
@app.post("/conversation/reset")
|
|
def conversation_reset():
|
|
"""Hardes Reset — der Rolling-Window-Verlauf wird komplett geleert.
|
|
Destillierte facts bleiben in der DB."""
|
|
conversation().reset()
|
|
return {"ok": True, "turns": 0}
|
|
|
|
|
|
@app.post("/conversation/distill")
|
|
def conversation_distill_now():
|
|
"""Manueller Trigger fuer Destillat — fuer Tests oder vor einem
|
|
bewussten Reset."""
|
|
return agent().distill_old_turns()
|
|
|
|
|
|
# ─── Call-Metrics (Token / Quota-Monitoring) ────────────────────────
|
|
|
|
@app.get("/metrics/calls")
|
|
def metrics_calls():
|
|
"""Liefert Aggregate fuer 1h / 5h / 24h / 30d.
|
|
Jedes Window: {window_seconds, calls, tokens_in, tokens_out, by_model}."""
|
|
return metrics_mod.stats()
|
|
|
|
|
|
# ─── Skills ─────────────────────────────────────────────────────────
|
|
|
|
class SkillCreate(BaseModel):
|
|
name: str
|
|
description: str
|
|
execution: str # local-venv | local-bin | bash
|
|
entry_code: str
|
|
readme: str = ""
|
|
args: list = Field(default_factory=list)
|
|
requires: dict = Field(default_factory=dict)
|
|
pip_packages: list = Field(default_factory=list)
|
|
author: str = "stefan"
|
|
|
|
|
|
class SkillRun(BaseModel):
|
|
name: str
|
|
args: dict = Field(default_factory=dict)
|
|
timeout_sec: int = 300
|
|
|
|
|
|
class SkillPatch(BaseModel):
|
|
description: str | None = None
|
|
active: bool | None = None
|
|
args: list | None = None
|
|
|
|
|
|
@app.get("/skills/list")
|
|
def skills_list(active_only: bool = False):
|
|
return {"skills": skills_mod.list_skills(active_only=active_only)}
|
|
|
|
|
|
@app.get("/skills/{name}")
|
|
def skills_get(name: str):
|
|
m = skills_mod.read_manifest(name)
|
|
if m is None:
|
|
raise HTTPException(404, f"Skill '{name}' nicht gefunden")
|
|
readme = skills_mod.read_readme(name)
|
|
return {"manifest": m, "readme": readme}
|
|
|
|
|
|
@app.post("/skills/create")
|
|
def skills_create(body: SkillCreate):
|
|
try:
|
|
return skills_mod.create_skill(
|
|
name=body.name,
|
|
description=body.description,
|
|
execution=body.execution,
|
|
entry_code=body.entry_code,
|
|
readme=body.readme,
|
|
args=body.args,
|
|
requires=body.requires,
|
|
pip_packages=body.pip_packages,
|
|
author=body.author,
|
|
)
|
|
except ValueError as exc:
|
|
raise HTTPException(400, str(exc))
|
|
|
|
|
|
@app.post("/skills/run")
|
|
def skills_run(body: SkillRun):
|
|
try:
|
|
return skills_mod.run_skill(body.name, args=body.args, timeout_sec=body.timeout_sec)
|
|
except ValueError as exc:
|
|
raise HTTPException(400, str(exc))
|
|
|
|
|
|
@app.patch("/skills/{name}")
|
|
def skills_patch(name: str, body: SkillPatch):
|
|
patch = {k: v for k, v in body.model_dump().items() if v is not None}
|
|
try:
|
|
return skills_mod.update_skill(name, patch)
|
|
except ValueError as exc:
|
|
raise HTTPException(404, str(exc))
|
|
|
|
|
|
@app.delete("/skills/{name}")
|
|
def skills_delete(name: str):
|
|
try:
|
|
skills_mod.delete_skill(name)
|
|
except ValueError as exc:
|
|
raise HTTPException(404, str(exc))
|
|
return {"deleted": name}
|
|
|
|
|
|
@app.get("/skills/{name}/logs")
|
|
def skills_logs(name: str, limit: int = 50):
|
|
return {"logs": skills_mod.list_logs(name, limit=limit)}
|
|
|
|
|
|
@app.get("/skills/{name}/export")
|
|
def skills_export(name: str):
|
|
try:
|
|
data = skills_mod.export_skill(name)
|
|
except ValueError as exc:
|
|
raise HTTPException(404, str(exc))
|
|
return Response(
|
|
content=data,
|
|
media_type="application/gzip",
|
|
headers={"Content-Disposition": f'attachment; filename="skill-{name}.tar.gz"'},
|
|
)
|
|
|
|
|
|
@app.post("/skills/import")
|
|
async def skills_import(request: Request, overwrite: bool = False):
|
|
data = await request.body()
|
|
if not data:
|
|
raise HTTPException(400, "Leerer Body")
|
|
try:
|
|
manifest = skills_mod.import_skill(data, overwrite=overwrite)
|
|
except ValueError as exc:
|
|
raise HTTPException(400, str(exc))
|
|
return {"imported": manifest}
|