feat(brain): Anhaenge an Memory-Eintraege (Stufe A — Backend)

Pro Memory koennen jetzt Dateien (Bilder, PDFs, Sound, ...) angehaengt
werden. Use-Case: Stefan sagt "ich hab eine Cessna 172" und pinnt
gleich ein Foto dran — ARIA sieht spaeter neben dem Memory auch die
visuelle Referenz (Stufe E = Multi-Modal-Pipeline).

Stufe A baut nur den Backend-Layer; UI kommt in Stufe B (Diagnostic)
und C (App). Anhaenge werden in Stufe A nur via HTTP-API gepflegt
(curl), ARIA selbst kann sie noch nicht hochladen — sinnvoll erst
wenn die Vision-Pipeline (Stufe E) steht.

Komponenten:

- memory_attachments.py: neuer Storage-Helper. Layout
  /shared/memory-attachments/<memory-id>/<safe-filename>.
  Filename-Sanitization (kein Path-Traversal), Limit 20 MB
  konfigurierbar, save/list/delete/read_bytes + delete_all fuer
  Cleanup beim Memory-Delete.

- vector_store.py: MemoryPoint.attachments (List[dict]) — Metadaten
  {name, mime, size, path} im Qdrant-Payload damit Suche/Anzeige
  sie ohne Filesystem-Lookup kennt.

- main.py:
  - MemoryIn akzeptiert attachments-Liste (fuer Restore-Faelle)
  - MemoryOut liefert attachments
  - GET    /memory/{id}/attachments              → Liste vom FS
  - POST   /memory/{id}/attachments              → Base64-Upload,
            schreibt FS + updated Payload-Liste
  - DELETE /memory/{id}/attachments/{filename}   → FS + Payload-Eintrag weg
  - GET    /memory/{id}/attachments/{filename}   → Bytes mit MIME serve
  - /memory/delete cleanup: ruft attachments.delete_all damit kein
    Verzeichnis verwaist

Smoke-Test nach Brain-Rebuild (Stefan auf VM):
  # Memory-ID rauspicken
  ID=$(curl -s "$ARIA_BRAIN_URL/memory/list?type=fact" | python3 -c "import sys,json;print(json.load(sys.stdin)[0]['id'])")
  # Bild als Base64 hochladen
  B64=$(base64 -w0 /pfad/zu/foto.jpg)
  curl -s -X POST "$ARIA_BRAIN_URL/memory/$ID/attachments" \
    -H 'Content-Type: application/json' \
    -d "{\"name\":\"foto.jpg\",\"data_base64\":\"$B64\"}" | jq
  # Liste anzeigen
  curl -s "$ARIA_BRAIN_URL/memory/$ID/attachments" | jq
  # Datei wieder laden
  curl -s "$ARIA_BRAIN_URL/memory/$ID/attachments/foto.jpg" -o /tmp/back.jpg

Stufe B (Diagnostic-UI) folgt sobald A getestet ist.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-13 02:27:02 +02:00
parent 331c1437be
commit c677cfed24
3 changed files with 250 additions and 0 deletions
+106
View File
@@ -114,6 +114,10 @@ class MemoryIn(BaseModel):
source: str = "manual"
tags: List[str] = Field(default_factory=list)
conversation_id: Optional[str] = None
# Vorhandene Anhang-Metadaten beim Save mitgeben (i.d.R. werden Anhaenge
# nach dem Save via /memory/{id}/attachments hinzugefuegt — hier eher fuer
# Bootstrap-Import/Restore-Faelle relevant).
attachments: List[dict] = Field(default_factory=list)
class MemoryUpdate(BaseModel):
@@ -137,12 +141,19 @@ class MemoryOut(BaseModel):
updated_at: str
conversation_id: Optional[str] = None
score: Optional[float] = None
attachments: List[dict] = Field(default_factory=list)
@classmethod
def from_point(cls, p: MemoryPoint) -> "MemoryOut":
return cls(**p.__dict__)
class AttachmentUploadBody(BaseModel):
"""Base64-Upload via JSON — Diagnostic schickt Files so."""
name: str
data_base64: str
# ─── Health ───────────────────────────────────────────────────────────
@app.get("/health")
@@ -231,6 +242,7 @@ def memory_save(body: MemoryIn):
source=body.source,
tags=body.tags,
conversation_id=body.conversation_id,
attachments=body.attachments or [],
)
pid = s.upsert(point, vec)
saved = s.get(pid)
@@ -279,9 +291,103 @@ def memory_delete(point_id: str):
if not s.get(point_id):
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
s.delete(point_id)
# Anhaenge mit-loeschen damit nichts verwaist
try:
import memory_attachments as mem_att
n = mem_att.delete_all(point_id)
if n:
logger.info("Memory %s + %d Anhaenge geloescht", point_id, n)
except Exception as exc:
logger.warning("Anhang-Cleanup fuer %s fehlgeschlagen: %s", point_id, exc)
return {"deleted": point_id}
# ─── Memory-Anhaenge ──────────────────────────────────────────────────
@app.get("/memory/{point_id}/attachments")
def memory_attachments_list(point_id: str):
"""Liste der Anhaenge zum Memory. Source-of-Truth ist das Payload
in der DB, aber wir mergen vorsichtshalber mit dem Filesystem-Stand
(falls ein Upload-Restart zwischendrin schiefging)."""
import memory_attachments as mem_att
s = store()
m = s.get(point_id)
if not m:
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
return {"memory_id": point_id, "attachments": mem_att.list_attachments(point_id)}
@app.post("/memory/{point_id}/attachments", response_model=MemoryOut)
def memory_attachments_add(point_id: str, body: AttachmentUploadBody):
"""Anhang als Base64 hochladen + im Memory-Payload eintragen."""
import memory_attachments as mem_att
s = store()
m = s.get(point_id)
if not m:
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
try:
meta = mem_att.save_from_base64(point_id, body.name, body.data_base64)
except ValueError as exc:
raise HTTPException(400, str(exc))
# Payload aktualisieren — neuer Anhang ans Ende, Duplikate (gleicher
# Filename) werden ersetzt damit die Liste nicht zweimal denselben
# Eintrag hat
atts = [a for a in (m.attachments or []) if a.get("name") != meta["name"]]
atts.append(meta)
m.attachments = atts
from qdrant_client.http import models as qm
from memory.vector_store import COLLECTION
import datetime as _dt
m.updated_at = _dt.datetime.now(_dt.timezone.utc).isoformat()
s.client.set_payload(
collection_name=COLLECTION,
payload=m.to_payload() | {"updated_at": m.updated_at},
points=[point_id],
)
return MemoryOut.from_point(s.get(point_id))
@app.delete("/memory/{point_id}/attachments/{filename}", response_model=MemoryOut)
def memory_attachments_delete(point_id: str, filename: str):
"""Einzelnen Anhang loeschen (FS + Payload-Eintrag)."""
import memory_attachments as mem_att
s = store()
m = s.get(point_id)
if not m:
raise HTTPException(404, f"Memory {point_id} nicht gefunden")
removed_fs = mem_att.delete_attachment(point_id, filename)
safe = filename # Cleanup synchron mit FS — Payload-Match per name
atts = [a for a in (m.attachments or []) if a.get("name") not in (filename, safe)]
m.attachments = atts
from qdrant_client.http import models as qm
from memory.vector_store import COLLECTION
import datetime as _dt
m.updated_at = _dt.datetime.now(_dt.timezone.utc).isoformat()
s.client.set_payload(
collection_name=COLLECTION,
payload=m.to_payload() | {"updated_at": m.updated_at},
points=[point_id],
)
if not removed_fs and not atts:
# weder im FS noch im Payload war was — Anhang existierte nicht
raise HTTPException(404, f"Anhang {filename} nicht gefunden")
return MemoryOut.from_point(s.get(point_id))
@app.get("/memory/{point_id}/attachments/{filename}")
def memory_attachments_get(point_id: str, filename: str):
"""Liefert die Bytes eines Anhangs. Diagnostic-Server kann das
durchproxien zur Vorschau/Download in der UI."""
import memory_attachments as mem_att
import mimetypes as _mt
data = mem_att.read_bytes(point_id, filename)
if data is None:
raise HTTPException(404, f"Anhang {filename} nicht gefunden")
mime = _mt.guess_type(filename)[0] or "application/octet-stream"
return Response(content=data, media_type=mime)
# ─── Migration aus brain-import/ ──────────────────────────────────────
IMPORT_DIR = os.environ.get("IMPORT_DIR", "/import")