|
""" |
|
๊ฐ์ ๋ ๋ฒกํฐ ์คํ ์ด ๋ชจ๋ - Milvus ์ค์ ์ต์ ํ |
|
""" |
|
from typing import List, Dict, Any, Optional |
|
import uuid |
|
from langchain.schema import Document |
|
|
|
|
|
try: |
|
|
|
from langchain_milvus import Milvus |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
MODERN_IMPORTS = True |
|
print("์ต์ langchain ํจํค์ง ์ํฌํธ ์ฑ๊ณต") |
|
except ImportError: |
|
|
|
from langchain_community.vectorstores import Milvus, FAISS |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
MODERN_IMPORTS = False |
|
print("๋ ๊ฑฐ์ langchain_community ํจํค์ง ์ฌ์ฉ") |
|
|
|
from config import MILVUS_HOST, MILVUS_PORT, MILVUS_COLLECTION, EMBEDDING_MODEL |
|
|
|
class VectorStore: |
|
def __init__(self, use_milvus: bool = True): |
|
""" |
|
๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ |
|
|
|
Args: |
|
use_milvus: Milvus ์ฌ์ฉ ์ฌ๋ถ (False์ด๋ฉด FAISS ์ฌ์ฉ) |
|
""" |
|
self.use_milvus = use_milvus |
|
|
|
|
|
print(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ๋ก๋ ์ค: {EMBEDDING_MODEL}") |
|
model_kwargs = { |
|
"device": "cpu", |
|
"trust_remote_code": True |
|
} |
|
encode_kwargs = {"normalize_embeddings": True} |
|
|
|
self.embeddings = HuggingFaceEmbeddings( |
|
model_name=EMBEDDING_MODEL, |
|
model_kwargs=model_kwargs, |
|
encode_kwargs=encode_kwargs |
|
) |
|
self.vector_store = None |
|
|
|
print(f"์๋ฒ ๋ฉ ๋ชจ๋ธ ์ด๊ธฐํ ์๋ฃ: {EMBEDDING_MODEL}") |
|
|
|
def init_milvus(self) -> Milvus: |
|
""" |
|
Milvus ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ |
|
|
|
Returns: |
|
Milvus ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
connection_args = { |
|
"host": MILVUS_HOST, |
|
"port": MILVUS_PORT, |
|
} |
|
|
|
|
|
index_params = { |
|
"index_type": "FLAT", |
|
"metric_type": "COSINE", |
|
"params": {} |
|
} |
|
|
|
return Milvus( |
|
embedding_function=self.embeddings, |
|
collection_name=MILVUS_COLLECTION, |
|
connection_args=connection_args, |
|
index_params=index_params |
|
) |
|
|
|
def init_faiss(self) -> FAISS: |
|
""" |
|
FAISS ๋ฒกํฐ ์คํ ์ด ์ด๊ธฐํ (๋ก์ปฌ ๋์ฒด์ฉ) |
|
|
|
Returns: |
|
FAISS ๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
return FAISS.from_documents([], self.embeddings) |
|
|
|
def create_or_load(self, documents: Optional[List[Document]] = None) -> Any: |
|
""" |
|
๋ฒกํฐ ์คํ ์ด ์์ฑ ๋๋ ๋ก๋ |
|
|
|
Args: |
|
documents: ์ ์ฅํ ๋ฌธ์ ๋ฆฌ์คํธ (None์ด๋ฉด ๋น ์คํ ์ด ์์ฑ) |
|
|
|
Returns: |
|
๋ฒกํฐ ์คํ ์ด ์ธ์คํด์ค |
|
""" |
|
if self.use_milvus: |
|
if documents: |
|
|
|
try: |
|
|
|
connection_args = { |
|
"host": MILVUS_HOST, |
|
"port": MILVUS_PORT, |
|
} |
|
|
|
|
|
index_params = { |
|
"index_type": "FLAT", |
|
"metric_type": "COSINE", |
|
"params": {} |
|
} |
|
|
|
print(f"Milvus ์ปฌ๋ ์
์์ฑ: {MILVUS_COLLECTION} (๊ธฐ์กด ์ปฌ๋ ์
์ญ์ )") |
|
|
|
|
|
self.vector_store = Milvus.from_documents( |
|
documents=documents, |
|
embedding=self.embeddings, |
|
collection_name=MILVUS_COLLECTION, |
|
connection_args=connection_args, |
|
index_params=index_params, |
|
drop_old=True |
|
) |
|
|
|
print(f"Milvus ์ปฌ๋ ์
์์ฑ ์๋ฃ: {len(documents)}๊ฐ ๋ฌธ์ ์ธ๋ฑ์ฑ๋จ") |
|
|
|
except Exception as e: |
|
print(f"Milvus ์ปฌ๋ ์
์์ฑ ์คํจ: {e}") |
|
|
|
print("๋์ฒด ๋ฐฉ์์ผ๋ก FAISS ์ฌ์ฉ") |
|
self.use_milvus = False |
|
self.vector_store = FAISS.from_documents(documents, self.embeddings) |
|
else: |
|
|
|
try: |
|
self.vector_store = self.init_milvus() |
|
except Exception as e: |
|
print(f"Milvus ์ปฌ๋ ์
๋ก๋ ์คํจ: {e}") |
|
|
|
print("๋์ฒด ๋ฐฉ์์ผ๋ก FAISS ์ฌ์ฉ") |
|
self.use_milvus = False |
|
self.vector_store = self.init_faiss() |
|
else: |
|
|
|
if documents: |
|
print(f"FAISS ์ธ๋ฑ์ค ์์ฑ: {len(documents)}๊ฐ ๋ฌธ์") |
|
self.vector_store = FAISS.from_documents(documents, self.embeddings) |
|
print("FAISS ์ธ๋ฑ์ค ์์ฑ ์๋ฃ") |
|
else: |
|
self.vector_store = self.init_faiss() |
|
print("๋น FAISS ์ธ๋ฑ์ค ์ด๊ธฐํ ์๋ฃ") |
|
|
|
return self.vector_store |
|
|
|
def add_documents(self, documents: List[Document]) -> None: |
|
""" |
|
๋ฒกํฐ ์คํ ์ด์ ๋ฌธ์ ์ถ๊ฐ |
|
|
|
Args: |
|
documents: ์ถ๊ฐํ ๋ฌธ์ ๋ฆฌ์คํธ |
|
""" |
|
if self.vector_store is None: |
|
self.create_or_load(documents) |
|
else: |
|
if self.use_milvus: |
|
self.vector_store.add_documents(documents) |
|
else: |
|
self.vector_store.add_documents(documents) |
|
|
|
def similarity_search(self, query: str, k: int = 5) -> List[Document]: |
|
""" |
|
๋ฒกํฐ ์ ์ฌ๋ ๊ฒ์ ์ํ |
|
|
|
Args: |
|
query: ๊ฒ์ ์ฟผ๋ฆฌ |
|
k: ๋ฐํํ ๊ฒฐ๊ณผ ์ |
|
|
|
Returns: |
|
์ ์ฌ๋๊ฐ ๋์ ๋ฌธ์ ๋ฆฌ์คํธ |
|
""" |
|
if self.vector_store is None: |
|
raise ValueError("๋ฒกํฐ ์คํ ์ด๊ฐ ์ด๊ธฐํ๋์ง ์์์ต๋๋ค.") |
|
|
|
print(f"๊ฒ์ ์ฟผ๋ฆฌ: '{query}', ์์ {k}๊ฐ ๊ฒฐ๊ณผ ์์ฒญ") |
|
results = self.vector_store.similarity_search(query, k=k) |
|
print(f"๊ฒ์ ์๋ฃ: {len(results)}๊ฐ ๊ฒฐ๊ณผ ์ฐพ์") |
|
|
|
return results |
|
|
|
def save_local(self, path: str = "faiss_index") -> None: |
|
""" |
|
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) |
|
|
|
Args: |
|
path: ์ ์ฅ ๊ฒฝ๋ก |
|
""" |
|
if not self.use_milvus and self.vector_store is not None: |
|
self.vector_store.save_local(path) |
|
print(f"FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ์ ์ฅ ์๋ฃ: {path}") |
|
|
|
""" |
|
FAISS ์ญ์ง๋ ฌํ ํ์ฉ ์ค์ ์ด, ํฌํจ๋ ๋ฒกํฐ ์คํ ์ด ์ฝ๋ |
|
""" |
|
|
|
|
|
|
|
def load_local(self, path: str = "faiss_index") -> None: |
|
""" |
|
FAISS ์ธ๋ฑ์ค ๋ก์ปฌ ๋ก๋ (Milvus ์ฌ์ฉ ์ ํ ๊ฒฝ์ฐ) |
|
|
|
Args: |
|
path: ๋ก๋ํ ์ธ๋ฑ์ค ๊ฒฝ๋ก |
|
""" |
|
if not self.use_milvus: |
|
try: |
|
print(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์ค: {path}") |
|
|
|
|
|
self.vector_store = FAISS.load_local( |
|
path, |
|
self.embeddings, |
|
allow_dangerous_deserialization=True |
|
) |
|
print(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์๋ฃ: {path}") |
|
except Exception as e: |
|
print(f"FAISS ์ธ๋ฑ์ค ๋ก๋ ์คํจ: {e}") |
|
|
|
|
|
import traceback |
|
traceback.print_exc() |
|
|
|
|
|
self.vector_store = self.init_faiss() |
|
print("์ FAISS ์ธ๋ฑ์ค ์ด๊ธฐํ๋จ") |