Spaces:
Sleeping
Sleeping
""" | |
Vector Store for Mindify Chat | |
""" | |
import chromadb | |
from chromadb import Collection | |
def set_up_chromadb(collection_name: str): | |
""" | |
Set up a ChromaDB collection for storing vectors. | |
Args: | |
collection_name: str | |
The name of the collection to create or retrieve. | |
Returns: | |
ChromaDB Collection | |
The ChromaDB collection object. | |
""" | |
chroma_client = chromadb.Client() | |
try: | |
# Check if the collection already exists | |
collection = chroma_client.get_collection(name=collection_name) | |
return collection | |
except: | |
# Create a new collection | |
collection = chroma_client.create_collection(name=collection_name) | |
return collection | |
def index_vector_store_chroma(collection_name, files: list): | |
""" | |
Index the files in the ChromaDB collection. | |
Args: | |
collection: ChromaDB Collection | |
The collection to store the vectors in. | |
files: list | |
A list of strings containing the contents of the files. | |
Returns: | |
bool | |
True if the data is stored successfully, False otherwise. | |
""" | |
# Set up collection | |
collection = set_up_chromadb(collection_name) | |
print("Indexing files...") | |
ids = [] | |
for i in range(len(files[0])): | |
ids.append(str(i)) | |
print("Storing GitHub data in ChromaDB...") | |
try: | |
collection.add(ids=ids, documents=files[0]) | |
print("Data stored successfully!") | |
return collection | |
except: | |
print("Error storing data in ChromaDB") | |
return False | |
def query_vector_store_chroma(collection: Collection, query: str): | |
""" | |
Query the ChromaDB collection for similar vectors to the query vector. | |
""" | |
print("Querying ChromaDB...") | |
try: | |
results = collection.query( | |
query_texts=[query], | |
n_results=2, | |
) | |
print("Query successful!") | |
return results["documents"][0][0] | |
except: | |
print("Error querying ChromaDB") | |
return None | |