mindify-chat-api-demo / cura /vector_store.py
MarkChenX's picture
Update cura/vector_store.py
4632499 verified
"""
Vector Store for Mindify Chat
"""
import chromadb
from chromadb import Collection
def set_up_chromadb(collection_name: str):
"""
Set up a ChromaDB collection for storing vectors.
Args:
collection_name: str
The name of the collection to create or retrieve.
Returns:
ChromaDB Collection
The ChromaDB collection object.
"""
chroma_client = chromadb.Client()
try:
# Check if the collection already exists
collection = chroma_client.get_collection(name=collection_name)
return collection
except:
# Create a new collection
collection = chroma_client.create_collection(name=collection_name)
return collection
def index_vector_store_chroma(collection_name, files: list):
"""
Index the files in the ChromaDB collection.
Args:
collection: ChromaDB Collection
The collection to store the vectors in.
files: list
A list of strings containing the contents of the files.
Returns:
bool
True if the data is stored successfully, False otherwise.
"""
# Set up collection
collection = set_up_chromadb(collection_name)
print("Indexing files...")
ids = []
for i in range(len(files[0])):
ids.append(str(i))
print("Storing GitHub data in ChromaDB...")
try:
collection.add(ids=ids, documents=files[0])
print("Data stored successfully!")
return collection
except:
print("Error storing data in ChromaDB")
return False
def query_vector_store_chroma(collection: Collection, query: str):
"""
Query the ChromaDB collection for similar vectors to the query vector.
"""
print("Querying ChromaDB...")
try:
results = collection.query(
query_texts=[query],
n_results=2,
)
print("Query successful!")
return results["documents"][0][0]
except:
print("Error querying ChromaDB")
return None