feat(brain): Volltext-Suche zusaetzlich zu Semantic — Default ist jetzt Wortlich
Stefan wollte ne richtige Suche statt nur "klingt aehnlich". Beide Modi sind jetzt verfuegbar, Default ist Volltext: - 📝 Wortlich (Substring, case-insensitive ueber Title + Content + Category + Tags) — neuer Endpoint /memory/search-text. Full-Scan via Qdrant scroll, k=50. Findet "cessna" exakt im Content. Bei kleiner DB (<1000 Eintraege) unkritisch performant. - 🧠 Semantisch (Embedder + score_threshold 0.30) — bestehender /memory/search Endpoint. Findet konzeptuell verwandte Eintraege. Diagnostic UI: Dropdown neben dem Suchfeld zum Modus-Wechsel. Info-Banner zeigt klar welcher Modus aktiv ist. Warum Wortlich Default: bei kleiner DB liefert Semantic gern False Positives mit Score 0.30-0.45 fuer komplett unverwandte Begriffe (z.B. "cessna" matched "Tageslog fuehren" mit 0.43). Wortlich ist deterministisch und vermeidet das Rauschen. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -181,6 +181,23 @@ def memory_pinned():
|
|||||||
return [MemoryOut.from_point(p) for p in store().list_pinned()]
|
return [MemoryOut.from_point(p) for p in store().list_pinned()]
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/memory/search-text", response_model=List[MemoryOut])
|
||||||
|
def memory_search_text(
|
||||||
|
q: str,
|
||||||
|
k: int = 50,
|
||||||
|
type: Optional[str] = None,
|
||||||
|
include_pinned: bool = True,
|
||||||
|
):
|
||||||
|
"""Volltext-Substring-Suche (case-insensitive) ueber Title + Content +
|
||||||
|
Category + Tags. Findet exakte Begriffe — z.B. 'cessna' matched 'Cessna 172'.
|
||||||
|
Im Gegensatz zu /memory/search (semantic) keine 'klingt aehnlich'-Treffer."""
|
||||||
|
points = store().search_text(
|
||||||
|
q, k=k, type_filter=type,
|
||||||
|
exclude_pinned=not include_pinned,
|
||||||
|
)
|
||||||
|
return [MemoryOut.from_point(p) for p in points]
|
||||||
|
|
||||||
|
|
||||||
@app.get("/memory/search", response_model=List[MemoryOut])
|
@app.get("/memory/search", response_model=List[MemoryOut])
|
||||||
def memory_search(
|
def memory_search(
|
||||||
q: str,
|
q: str,
|
||||||
|
|||||||
@@ -213,3 +213,56 @@ class VectorStore:
|
|||||||
|
|
||||||
def count(self) -> int:
|
def count(self) -> int:
|
||||||
return self.client.count(collection_name=COLLECTION, exact=True).count
|
return self.client.count(collection_name=COLLECTION, exact=True).count
|
||||||
|
|
||||||
|
def search_text(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
k: int = 20,
|
||||||
|
type_filter: Optional[str] = None,
|
||||||
|
exclude_pinned: bool = False,
|
||||||
|
) -> List[MemoryPoint]:
|
||||||
|
"""Volltext-Substring-Suche (case-insensitive) ueber Title +
|
||||||
|
Content + Category + Tags. Im Gegensatz zu search() ist das KEIN
|
||||||
|
Semantic-Match — nur exakte Wort-/Teilwort-Treffer.
|
||||||
|
|
||||||
|
Full-Scan ueber alle (gefilteren) Punkte. Bei der erwarteten
|
||||||
|
Groessenordnung (< 1000) unkritisch."""
|
||||||
|
q = (query or "").strip().lower()
|
||||||
|
if not q:
|
||||||
|
return []
|
||||||
|
must = []
|
||||||
|
must_not = []
|
||||||
|
if type_filter:
|
||||||
|
must.append(qm.FieldCondition(key="type", match=qm.MatchValue(value=type_filter)))
|
||||||
|
if exclude_pinned:
|
||||||
|
must_not.append(qm.FieldCondition(key="pinned", match=qm.MatchValue(value=True)))
|
||||||
|
flt = qm.Filter(must=must or None, must_not=must_not or None) if (must or must_not) else None
|
||||||
|
|
||||||
|
matches: List[MemoryPoint] = []
|
||||||
|
offset = None
|
||||||
|
while True:
|
||||||
|
points, offset = self.client.scroll(
|
||||||
|
collection_name=COLLECTION,
|
||||||
|
scroll_filter=flt,
|
||||||
|
limit=200,
|
||||||
|
offset=offset,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False,
|
||||||
|
)
|
||||||
|
for p in points:
|
||||||
|
payload = p.payload or {}
|
||||||
|
tags = payload.get("tags")
|
||||||
|
tags_str = " ".join(tags) if isinstance(tags, list) else ""
|
||||||
|
haystack = " ".join([
|
||||||
|
str(payload.get("title", "")),
|
||||||
|
str(payload.get("content", "")),
|
||||||
|
str(payload.get("category", "")),
|
||||||
|
tags_str,
|
||||||
|
]).lower()
|
||||||
|
if q in haystack:
|
||||||
|
matches.append(MemoryPoint.from_qdrant(p))
|
||||||
|
if len(matches) >= k:
|
||||||
|
return matches
|
||||||
|
if not offset:
|
||||||
|
break
|
||||||
|
return matches
|
||||||
|
|||||||
+24
-8
@@ -824,9 +824,15 @@
|
|||||||
</div>
|
</div>
|
||||||
<div class="card" style="margin-bottom:8px;">
|
<div class="card" style="margin-bottom:8px;">
|
||||||
<div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center;">
|
<div style="display:flex;gap:8px;flex-wrap:wrap;align-items:center;">
|
||||||
<input type="text" id="brain-search" placeholder="Semantische Suche (z.B. 'Stefan Persönlichkeit')..."
|
<input type="text" id="brain-search" placeholder="Suche (z.B. 'cessna' oder 'Stefan Persönlichkeit')..."
|
||||||
style="flex:1;min-width:200px;background:#080810;color:#E0E0F0;border:1px solid #1E1E2E;padding:6px 8px;border-radius:4px;font-family:inherit;font-size:12px;"
|
style="flex:1;min-width:200px;background:#080810;color:#E0E0F0;border:1px solid #1E1E2E;padding:6px 8px;border-radius:4px;font-family:inherit;font-size:12px;"
|
||||||
onkeydown="if(event.key==='Enter') runBrainSearch()">
|
onkeydown="if(event.key==='Enter') runBrainSearch()">
|
||||||
|
<select id="brain-search-mode" onchange="if(document.getElementById('brain-search').value.trim()) runBrainSearch()"
|
||||||
|
title="Wortlich = exakter Substring-Match. Semantisch = 'klingt aehnlich' via Embeddings."
|
||||||
|
style="background:#080810;color:#E0E0F0;border:1px solid #1E1E2E;padding:6px;border-radius:4px;font-family:inherit;font-size:11px;">
|
||||||
|
<option value="text" selected>📝 Wortlich</option>
|
||||||
|
<option value="semantic">🧠 Semantisch</option>
|
||||||
|
</select>
|
||||||
<button class="btn secondary" onclick="runBrainSearch()" style="padding:4px 12px;font-size:11px;">Suchen</button>
|
<button class="btn secondary" onclick="runBrainSearch()" style="padding:4px 12px;font-size:11px;">Suchen</button>
|
||||||
<select id="brain-filter-type" onchange="loadBrainMemoryList()"
|
<select id="brain-filter-type" onchange="loadBrainMemoryList()"
|
||||||
style="background:#080810;color:#E0E0F0;border:1px solid #1E1E2E;padding:6px;border-radius:4px;font-family:inherit;font-size:11px;">
|
style="background:#080810;color:#E0E0F0;border:1px solid #1E1E2E;padding:6px;border-radius:4px;font-family:inherit;font-size:11px;">
|
||||||
@@ -3457,13 +3463,23 @@
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const typeFilter = document.getElementById('brain-filter-type').value;
|
const typeFilter = document.getElementById('brain-filter-type').value;
|
||||||
// k=10 + Score-Threshold im Backend (0.30) → nur relevante Treffer.
|
const mode = (document.getElementById('brain-search-mode')?.value) || 'text';
|
||||||
// Frueher k=20 ohne Threshold: bei kleiner DB landete fast alles
|
let url, modeLabel;
|
||||||
// als "Treffer", egal wie unaehnlich.
|
if (mode === 'semantic') {
|
||||||
|
// Embedder-basiert, mit Score-Threshold gegen Rauschen
|
||||||
const params = new URLSearchParams({ q, k: '10', include_pinned: 'true', score_threshold: '0.30' });
|
const params = new URLSearchParams({ q, k: '10', include_pinned: 'true', score_threshold: '0.30' });
|
||||||
if (typeFilter) params.set('type', typeFilter);
|
if (typeFilter) params.set('type', typeFilter);
|
||||||
|
url = '/api/brain/memory/search?' + params.toString();
|
||||||
|
modeLabel = '🧠 semantisch (Score ≥ 0.30)';
|
||||||
|
} else {
|
||||||
|
// Volltext-Substring (case-insensitive) — findet exakte Begriffe
|
||||||
|
const params = new URLSearchParams({ q, k: '50', include_pinned: 'true' });
|
||||||
|
if (typeFilter) params.set('type', typeFilter);
|
||||||
|
url = '/api/brain/memory/search-text?' + params.toString();
|
||||||
|
modeLabel = '📝 wortlich (Substring)';
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const r = await fetch('/api/brain/memory/search?' + params.toString());
|
const r = await fetch(url);
|
||||||
if (!r.ok) throw new Error('HTTP ' + r.status);
|
if (!r.ok) throw new Error('HTTP ' + r.status);
|
||||||
const hits = await r.json();
|
const hits = await r.json();
|
||||||
hits.forEach(m => { brainMemoryCache[m.id] = m; });
|
hits.forEach(m => { brainMemoryCache[m.id] = m; });
|
||||||
@@ -3471,13 +3487,13 @@
|
|||||||
if (info) {
|
if (info) {
|
||||||
info.style.display = 'block';
|
info.style.display = 'block';
|
||||||
if (hits.length === 0) {
|
if (hits.length === 0) {
|
||||||
info.innerHTML = `🔍 Keine relevanten Treffer für "${escapeHtml(q)}"` +
|
info.innerHTML = `🔍 Keine Treffer für "${escapeHtml(q)}"` +
|
||||||
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
||||||
` (Score < 0.30). Versuche andere Begriffe oder klicke das ✕ rechts um die Suche zu schliessen.`;
|
` · ${modeLabel}. Anderen Begriff probieren oder ✕ rechts um Suche zu schliessen.`;
|
||||||
} else {
|
} else {
|
||||||
info.innerHTML = `🔍 ${hits.length} Treffer für "${escapeHtml(q)}"` +
|
info.innerHTML = `🔍 ${hits.length} Treffer für "${escapeHtml(q)}"` +
|
||||||
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
(typeFilter ? ` · Typ=${escapeHtml(typeFilter)}` : '') +
|
||||||
` · sortiert nach Aehnlichkeit (Score ≥ 0.30)`;
|
` · ${modeLabel}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
renderBrainList(hits, true);
|
renderBrainList(hits, true);
|
||||||
|
|||||||
Reference in New Issue
Block a user