georgeek's picture
Transfer
5ecde30
raw
history blame contribute delete
817 Bytes
from sentence_transformers import SentenceTransformer
import faiss
def load_faiss_index(file_name="vector_db.index"):
index = faiss.read_index(file_name)
return index
def normalize_embeddings(embeddings):
faiss.normalize_L2(embeddings)
return embeddings
def train_model(model_name='dumitrescustefan/bert-base-romanian-cased-v1'):
model = SentenceTransformer(model_name)
return model
def get_embeddings(model, texts):
embeddings = model.encode(texts, convert_to_tensor=True)
return embeddings
def save_embeddings(embeddings, file_name="vector_db.index"):
import faiss
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
faiss.normalize_L2(embeddings)
index.add(embeddings.cpu().numpy())
faiss.write_index(index, file_name)
return index