""" Lokaler Embedder fuer Memory-Texte. Nutzt sentence-transformers (paraphrase-multilingual-MiniLM-L12-v2): - Deutsch + Englisch - 384-dimensionale Vektoren - Laeuft auf CPU, ~30ms pro kurzer Text - Modell wird beim ersten Aufruf in /data/_models gecached """ from __future__ import annotations import logging from typing import List logger = logging.getLogger(__name__) MODEL_NAME = "paraphrase-multilingual-MiniLM-L12-v2" VECTOR_DIM = 384 class Embedder: def __init__(self, model_name: str = MODEL_NAME): self.model_name = model_name self._model = None def _load(self): if self._model is None: logger.info("Lade Embedding-Modell %s ...", self.model_name) from sentence_transformers import SentenceTransformer self._model = SentenceTransformer(self.model_name) logger.info("Embedding-Modell geladen.") def embed(self, text: str) -> List[float]: self._load() vec = self._model.encode(text, convert_to_numpy=True, normalize_embeddings=True) return vec.tolist() def embed_batch(self, texts: List[str]) -> List[List[float]]: self._load() vecs = self._model.encode(texts, convert_to_numpy=True, normalize_embeddings=True) return vecs.tolist()