fix(brain): Memory-Suche filtert jetzt Rauschen — score_threshold + kleineres k
Bug: bei kleiner DB (31 Eintraege) lieferte die Suche fuer JEDES Wort fast alles als Treffer zurueck — k=20 Top-N ohne Threshold sorgte dafuer dass auch "banane" zehn vermeintliche Treffer mit Scores 0.09-0.22 (= Rauschen) zurueckgab. Fix: - vector_store.search() bekommt optional score_threshold (an Qdrant durchgereicht, das nimmt's nativ) - /memory/search endpoint hat score_threshold-Query-Param (default 0.30) - Diagnostic schickt k=10 + score_threshold=0.30 statt k=20 ohne Threshold - "Keine Treffer"-Info-Box wenn alle Treffer < Threshold MiniLM-multilingual liefert typischerweise: >0.50 → starker Treffer 0.30-0.50 → relevant 0.20-0.30 → grenzwertig <0.20 → Rauschen Mit score_threshold=0 (oder None) bleibt die alte Top-N-Semantik fuer Aufrufer die Rauschen explizit wollen. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+14
-2
@@ -182,9 +182,21 @@ def memory_pinned():
|
|||||||
|
|
||||||
|
|
||||||
@app.get("/memory/search", response_model=List[MemoryOut])
|
@app.get("/memory/search", response_model=List[MemoryOut])
|
||||||
def memory_search(q: str, k: int = 5, type: Optional[str] = None, include_pinned: bool = False):
|
def memory_search(
|
||||||
|
q: str,
|
||||||
|
k: int = 5,
|
||||||
|
type: Optional[str] = None,
|
||||||
|
include_pinned: bool = False,
|
||||||
|
score_threshold: Optional[float] = 0.30,
|
||||||
|
):
|
||||||
|
"""Semantische Suche. score_threshold filtert schwache Treffer raus
|
||||||
|
(Default 0.30 — MiniLM-multilingual liefert <0.25 fuer Rauschen).
|
||||||
|
Mit score_threshold=0 wird komplett Top-k zurueckgegeben."""
|
||||||
vec = embedder().embed(q)
|
vec = embedder().embed(q)
|
||||||
points = store().search(vec, k=k, type_filter=type, exclude_pinned=not include_pinned)
|
points = store().search(
|
||||||
|
vec, k=k, type_filter=type, exclude_pinned=not include_pinned,
|
||||||
|
score_threshold=score_threshold if score_threshold and score_threshold > 0 else None,
|
||||||
|
)
|
||||||
return [MemoryOut.from_point(p) for p in points]
|
return [MemoryOut.from_point(p) for p in points]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -184,9 +184,14 @@ class VectorStore:
|
|||||||
k: int = 5,
|
k: int = 5,
|
||||||
type_filter: Optional[str] = None,
|
type_filter: Optional[str] = None,
|
||||||
exclude_pinned: bool = True,
|
exclude_pinned: bool = True,
|
||||||
|
score_threshold: Optional[float] = None,
|
||||||
) -> List[MemoryPoint]:
|
) -> List[MemoryPoint]:
|
||||||
"""Semantische Search. Standard: pinned-Punkte ausgeschlossen
|
"""Semantische Search. Standard: pinned-Punkte ausgeschlossen
|
||||||
(die kommen separat via list_pinned in den Prompt)."""
|
(die kommen separat via list_pinned in den Prompt).
|
||||||
|
|
||||||
|
score_threshold: nur Treffer mit Cosine-Similarity >= Schwelle
|
||||||
|
zurueckgeben. None = keine Filterung. MiniLM-multilingual liefert
|
||||||
|
typischerweise 0.3-0.6 fuer relevante Treffer; <0.25 ist Rauschen."""
|
||||||
must = []
|
must = []
|
||||||
must_not = []
|
must_not = []
|
||||||
if type_filter:
|
if type_filter:
|
||||||
@@ -202,6 +207,7 @@ class VectorStore:
|
|||||||
query_filter=flt if (must or must_not) else None,
|
query_filter=flt if (must or must_not) else None,
|
||||||
limit=k,
|
limit=k,
|
||||||
with_payload=True,
|
with_payload=True,
|
||||||
|
score_threshold=score_threshold,
|
||||||
)
|
)
|
||||||
return [MemoryPoint.from_qdrant(p) for p in results]
|
return [MemoryPoint.from_qdrant(p) for p in results]
|
||||||
|
|
||||||
|
|||||||
+13
-4
@@ -3410,7 +3410,10 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const typeFilter = document.getElementById('brain-filter-type').value;
|
const typeFilter = document.getElementById('brain-filter-type').value;
|
||||||
const params = new URLSearchParams({ q, k: '20', include_pinned: 'true' });
|
// k=10 + Score-Threshold im Backend (0.30) → nur relevante Treffer.
|
||||||
|
// Frueher k=20 ohne Threshold: bei kleiner DB landete fast alles
|
||||||
|
// als "Treffer", egal wie unaehnlich.
|
||||||
|
const params = new URLSearchParams({ q, k: '10', include_pinned: 'true', score_threshold: '0.30' });
|
||||||
if (typeFilter) params.set('type', typeFilter);
|
if (typeFilter) params.set('type', typeFilter);
|
||||||
try {
|
try {
|
||||||
const r = await fetch('/api/brain/memory/search?' + params.toString());
|
const r = await fetch('/api/brain/memory/search?' + params.toString());
|
||||||
@@ -3420,9 +3423,15 @@
|
|||||||
brainSearchIds = hits.map(m => m.id);
|
brainSearchIds = hits.map(m => m.id);
|
||||||
if (info) {
|
if (info) {
|
||||||
info.style.display = 'block';
|
info.style.display = 'block';
|
||||||
info.innerHTML = `🔍 ${hits.length} Treffer für "${escapeHtml(q)}"` +
|
if (hits.length === 0) {
|
||||||
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
info.innerHTML = `🔍 Keine relevanten Treffer für "${escapeHtml(q)}"` +
|
||||||
` · sortiert nach Aehnlichkeit`;
|
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
||||||
|
` (Score < 0.30). Versuche andere Begriffe oder klicke das ✕ rechts um die Suche zu schliessen.`;
|
||||||
|
} else {
|
||||||
|
info.innerHTML = `🔍 ${hits.length} Treffer für "${escapeHtml(q)}"` +
|
||||||
|
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
||||||
|
` · sortiert nach Aehnlichkeit (Score ≥ 0.30)`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
renderBrainList(hits, true);
|
renderBrainList(hits, true);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
Reference in New Issue
Block a user