Spaces:
Sleeping
Sleeping
from sentence_transformers import SentenceTransformer | |
import chromadb | |
def split_list(list_,chunk_size): | |
return [list_[i:i+chunk_size] for i in range(0,len(list_),chunk_size)] | |
def create_database(txt): | |
class EmbeddingFn: | |
def __init__(self,model_name): | |
self.model = SentenceTransformer(model_name) | |
def __call__(self,input): | |
return self.model.encode(input).tolist() | |
embedding_fn = EmbeddingFn("sentence-transformers/all-mpnet-base-v2") | |
ids = [str(i) for i in range(len(txt))] | |
chromadb.api.client.SharedSystemClient.clear_system_cache() | |
chroma_cli = chromadb.Client() | |
existing_collections = [collection.name for collection in chroma_cli.list_collections()] | |
if "chat-with-docs" in existing_collections: | |
chroma_cli.delete_collection(name="chat-with-docs") | |
collection = chroma_cli.create_collection("chat-with-docs",embedding_function=embedding_fn) | |
txt = split_list(txt,5000) | |
ids = split_list(ids,5000) | |
for txt_chunk,ids_chunk in zip(txt,ids): | |
collection.add(documents=txt_chunk,ids=ids_chunk) | |
return collection |