|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
|
|
def load_faiss_index(file_name="vector_db.index"): |
|
index = faiss.read_index(file_name) |
|
return index |
|
|
|
def normalize_embeddings(embeddings): |
|
faiss.normalize_L2(embeddings) |
|
return embeddings |
|
|
|
|
|
def train_model(model_name='dumitrescustefan/bert-base-romanian-cased-v1'): |
|
model = SentenceTransformer(model_name) |
|
return model |
|
|
|
def get_embeddings(model, texts): |
|
embeddings = model.encode(texts, convert_to_tensor=True) |
|
return embeddings |
|
|
|
def save_embeddings(embeddings, file_name="vector_db.index"): |
|
import faiss |
|
dimension = embeddings.shape[1] |
|
index = faiss.IndexFlatL2(dimension) |
|
faiss.normalize_L2(embeddings) |
|
index.add(embeddings.cpu().numpy()) |
|
faiss.write_index(index, file_name) |
|
return index |