Spaces:

retopara
/

ragflow

Build error

liuhua liuhua commited on Oct 24, 2024

Commit

5b9e61c

1 Parent(s): 7af2206

Fix some issues in API and test (#3001)

### What problem does this PR solve?

Fix some issues in API and test

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>

Files changed (6) hide show

api/apps/sdk/chat.py +7 -7
api/apps/sdk/doc.py +9 -10
sdk/python/ragflow/modules/chat.py +1 -1
sdk/python/ragflow/ragflow.py +11 -10
sdk/python/test/t_chat.py +19 -16
sdk/python/test/t_session.py +16 -31

api/apps/sdk/chat.py CHANGED Viewed

@@ -30,9 +30,9 @@ from api.utils.api_utils import get_result
 @token_required
 def create(tenant_id):
     req=request.json
-    ids= req.get("datasets")
     if not ids:
-        return get_error_data_result(retmsg="`datasets` is required")
     for kb_id in ids:
         kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
         if not kbs:
@@ -138,7 +138,7 @@ def create(tenant_id):
     res["llm"] = res.pop("llm_setting")
     res["llm"]["model_name"] = res.pop("llm_id")
     del res["kb_ids"]
-    res["datasets"] = req["datasets"]
     res["avatar"] = res.pop("icon")
     return get_result(data=res)
@@ -148,8 +148,8 @@ def update(tenant_id,chat_id):
     if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
         return get_error_data_result(retmsg='You do not own the chat')
     req =request.json
-    ids = req.get("datasets")
-    if "datasets" in req:
         if not ids:
             return get_error_data_result("`datasets` can't be empty")
         if ids:
@@ -214,8 +214,8 @@ def update(tenant_id,chat_id):
     # avatar
     if "avatar" in req:
         req["icon"] = req.pop("avatar")
-    if "datasets" in req:
-        req.pop("datasets")
     if not DialogService.update_by_id(chat_id, req):
         return get_error_data_result(retmsg="Chat not found!")
     return get_result()

 @token_required
 def create(tenant_id):
     req=request.json
+    ids= req.get("dataset_ids")
     if not ids:
+        return get_error_data_result(retmsg="`dataset_ids` is required")
     for kb_id in ids:
         kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
         if not kbs:
     res["llm"] = res.pop("llm_setting")
     res["llm"]["model_name"] = res.pop("llm_id")
     del res["kb_ids"]
+    res["dataset_ids"] = req["dataset_ids"]
     res["avatar"] = res.pop("icon")
     return get_result(data=res)
     if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
         return get_error_data_result(retmsg='You do not own the chat')
     req =request.json
+    ids = req.get("dataset_ids")
+    if "dataset_ids" in req:
         if not ids:
             return get_error_data_result("`datasets` can't be empty")
         if ids:
     # avatar
     if "avatar" in req:
         req["icon"] = req.pop("avatar")
+    if "dataset_ids" in req:
+        req.pop("dataset_ids")
     if not DialogService.update_by_id(chat_id, req):
         return get_error_data_result(retmsg="Chat not found!")
     return get_result()

api/apps/sdk/doc.py CHANGED Viewed

@@ -550,33 +550,32 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
 @token_required
 def retrieval_test(tenant_id):
     req = request.json
-    if not req.get("datasets"):
         return get_error_data_result("`datasets` is required.")
-    kb_ids = req["datasets"]
     if not isinstance(kb_ids,list):
         return get_error_data_result("`datasets` should be a list")
     kbs = KnowledgebaseService.get_by_ids(kb_ids)
     embd_nms = list(set([kb.embd_id for kb in kbs]))
     if len(embd_nms) != 1:
         return get_result(
-            retmsg='Knowledge bases use different embedding models or does not exist."',
             retcode=RetCode.AUTHENTICATION_ERROR)
-    if isinstance(kb_ids, str): kb_ids = [kb_ids]
-    for id in kb_ids:
-        if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
-            return get_error_data_result(f"You don't own the dataset {id}.")
     if "question" not in req:
         return get_error_data_result("`question` is required.")
     page = int(req.get("offset", 1))
     size = int(req.get("limit", 1024))
     question = req["question"]
-    doc_ids = req.get("documents", [])
-    if not isinstance(req.get("documents"),list):
         return get_error_data_result("`documents` should be a list")
     doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
     for doc_id in doc_ids:
         if doc_id not in doc_ids_list:
-            return get_error_data_result(f"You don't own the document {doc_id}")
     similarity_threshold = float(req.get("similarity_threshold", 0.2))
     vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
     top = int(req.get("top_k", 1024))

 @token_required
 def retrieval_test(tenant_id):
     req = request.json
+    if not req.get("dataset_ids"):
         return get_error_data_result("`datasets` is required.")
+    kb_ids = req["dataset_ids"]
     if not isinstance(kb_ids,list):
         return get_error_data_result("`datasets` should be a list")
     kbs = KnowledgebaseService.get_by_ids(kb_ids)
+    for id in kb_ids:
+        if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
+            return get_error_data_result(f"You don't own the dataset {id}.")
     embd_nms = list(set([kb.embd_id for kb in kbs]))
     if len(embd_nms) != 1:
         return get_result(
+            retmsg='Datasets use different embedding models."',
             retcode=RetCode.AUTHENTICATION_ERROR)
     if "question" not in req:
         return get_error_data_result("`question` is required.")
     page = int(req.get("offset", 1))
     size = int(req.get("limit", 1024))
     question = req["question"]
+    doc_ids = req.get("document_ids", [])
+    if not isinstance(doc_ids,list):
         return get_error_data_result("`documents` should be a list")
     doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
     for doc_id in doc_ids:
         if doc_id not in doc_ids_list:
+            return get_error_data_result(f"The datasets don't own the document {doc_id}")
     similarity_threshold = float(req.get("similarity_threshold", 0.2))
     vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
     top = int(req.get("top_k", 1024))

sdk/python/ragflow/modules/chat.py CHANGED Viewed

@@ -9,7 +9,7 @@ class Chat(Base):
         self.id = ""
         self.name = "assistant"
         self.avatar = "path/to/avatar"
-        self.datasets = ["kb1"]
         self.llm = Chat.LLM(rag, {})
         self.prompt = Chat.Prompt(rag, {})
         super().__init__(rag, res_dict)

         self.id = ""
         self.name = "assistant"
         self.avatar = "path/to/avatar"
+        self.dataset_ids = ["kb1"]
         self.llm = Chat.LLM(rag, {})
         self.prompt = Chat.Prompt(rag, {})
         super().__init__(rag, res_dict)

sdk/python/ragflow/ragflow.py CHANGED Viewed

@@ -64,8 +64,8 @@ class RAGFlow:
             return DataSet(self, res["data"])
         raise Exception(res["message"])
-    def delete_datasets(self, ids: List[str] = None, names: List[str] = None):
-        res = self.delete("/dataset",{"ids": ids, "names": names})
         res=res.json()
         if res.get("code") != 0:
             raise Exception(res["message"])
@@ -89,11 +89,11 @@ class RAGFlow:
             return result_list
         raise Exception(res["message"])
-    def create_chat(self, name: str, avatar: str = "", datasets: List[DataSet] = [],
                          llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
         dataset_list = []
-        for dataset in datasets:
-            dataset_list.append(dataset.id)
         if llm is None:
             llm = Chat.LLM(self, {"model_name": None,
@@ -126,7 +126,7 @@ class RAGFlow:
         temp_dict = {"name": name,
                      "avatar": avatar,
-                     "datasets": dataset_list,
                      "llm": llm.to_json(),
                      "prompt": prompt.to_json()}
         res = self.post("/chat", temp_dict)
@@ -154,7 +154,9 @@ class RAGFlow:
         raise Exception(res["message"])
-    def retrieve(self, datasets,documents,question="", offset=1, limit=1024, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,):
             data_json ={
                 "offset": offset,
                 "limit": limit,
@@ -164,10 +166,9 @@ class RAGFlow:
                 "rerank_id": rerank_id,
                 "keyword": keyword,
                 "question": question,
-                "datasets": datasets,
-                "documents": documents
             }
             # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
             res = self.post(f'/retrieval',json=data_json)
             res = res.json()

             return DataSet(self, res["data"])
         raise Exception(res["message"])
+    def delete_datasets(self, ids: List[str]):
+        res = self.delete("/dataset",{"ids": ids})
         res=res.json()
         if res.get("code") != 0:
             raise Exception(res["message"])
             return result_list
         raise Exception(res["message"])
+    def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [],
                          llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
         dataset_list = []
+        for id in dataset_ids:
+            dataset_list.append(id)
         if llm is None:
             llm = Chat.LLM(self, {"model_name": None,
         temp_dict = {"name": name,
                      "avatar": avatar,
+                     "dataset_ids": dataset_list,
                      "llm": llm.to_json(),
                      "prompt": prompt.to_json()}
         res = self.post("/chat", temp_dict)
         raise Exception(res["message"])
+    def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ):
+            if document_ids is None:
+                document_ids = []
             data_json ={
                 "offset": offset,
                 "limit": limit,
                 "rerank_id": rerank_id,
                 "keyword": keyword,
                 "question": question,
+                "datasets": dataset_ids,
+                "documents": document_ids
             }
             # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
             res = self.post(f'/retrieval',json=data_json)
             res = res.json()

sdk/python/test/t_chat.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from ragflow import RAGFlow, Chat
-import time
 HOST_ADDRESS = 'http://127.0.0.1:9380'
 def test_create_chat_with_name(get_api_key_fixture):
@@ -12,13 +11,10 @@ def test_create_chat_with_name(get_api_key_fixture):
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
-    doc_ids = []
     docs= kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    rag.create_chat("test_create", datasets=[kb])
 def test_update_chat_with_name(get_api_key_fixture):
@@ -31,13 +27,10 @@ def test_update_chat_with_name(get_api_key_fixture):
     document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
-    doc_ids = []
     docs = kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    chat = rag.create_chat("test_update", datasets=[kb])
     chat.update({"name": "new_chat"})
@@ -51,17 +44,27 @@ def test_delete_chats_with_success(get_api_key_fixture):
     document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
-    doc_ids = []
     docs = kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    chat = rag.create_chat("test_delete", datasets=[kb])
     rag.delete_chats(ids=[chat.id])
     API_KEY = get_api_key_fixture
     rag = RAGFlow(API_KEY, HOST_ADDRESS)
     rag.list_chats()

 from ragflow import RAGFlow, Chat
 HOST_ADDRESS = 'http://127.0.0.1:9380'
 def test_create_chat_with_name(get_api_key_fixture):
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
     docs= kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    rag.create_chat("test_create", dataset_ids=[kb.id])
 def test_update_chat_with_name(get_api_key_fixture):
     document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
     docs = kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    chat = rag.create_chat("test_update", dataset_ids=[kb.id])
     chat.update({"name": "new_chat"})
     document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
     docs = kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    chat = rag.create_chat("test_delete", dataset_ids=[kb.id])
     rag.delete_chats(ids=[chat.id])
+def test_list_chats_with_success(get_api_key_fixture):
     API_KEY = get_api_key_fixture
     rag = RAGFlow(API_KEY, HOST_ADDRESS)
+    kb = rag.create_dataset(name="test_delete_chat")
+    displayed_name = "ragflow.txt"
+    with open("./ragflow.txt", "rb") as file:
+        blob = file.read()
+    document = {"displayed_name": displayed_name, "blob": blob}
+    documents = []
+    documents.append(document)
+    docs = kb.upload_documents(documents)
+    for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    rag.create_chat("test_list_1", dataset_ids=[kb.id])
+    rag.create_chat("test_list_2", dataset_ids=[kb.id])
     rag.list_chats()

sdk/python/test/t_session.py CHANGED Viewed

@@ -10,16 +10,13 @@ def test_create_session_with_success(get_api_key_fixture):
     displayed_name = "ragflow.txt"
     with open("./ragflow.txt", "rb") as file:
         blob = file.read()
-    document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
-    doc_ids = []
-    docs = kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    assistant = rag.create_chat(name="test_create_session", datasets=[kb])
     assistant.create_session()
@@ -30,16 +27,13 @@ def test_create_conversation_with_success(get_api_key_fixture):
     displayed_name = "ragflow.txt"
     with open("./ragflow.txt","rb") as file:
         blob = file.read()
-    document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
-    doc_ids = []
-    docs= kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    assistant = rag.create_chat(name="test_create_conversation", datasets=[kb])
     session = assistant.create_session()
     question = "What is AI"
     for ans in session.ask(question, stream=True):
@@ -57,13 +51,10 @@ def test_delete_sessions_with_success(get_api_key_fixture):
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
-    doc_ids = []
     docs= kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    assistant = rag.create_chat(name="test_delete_session", datasets=[kb])
     session = assistant.create_session()
     assistant.delete_sessions(ids=[session.id])
@@ -74,16 +65,13 @@ def test_update_session_with_name(get_api_key_fixture):
     displayed_name = "ragflow.txt"
     with open("./ragflow.txt","rb") as file:
         blob = file.read()
-    document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
-    doc_ids = []
-    docs= kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    assistant = rag.create_chat(name="test_update_session", datasets=[kb])
     session = assistant.create_session(name="old session")
     session.update({"name": "new session"})
@@ -98,13 +86,10 @@ def test_list_sessions_with_success(get_api_key_fixture):
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
-    doc_ids = []
     docs= kb.upload_documents(documents)
     for doc in docs:
-        doc_ids.append(doc.id)
-    kb.async_parse_documents(doc_ids)
-    time.sleep(60)
-    assistant = rag.create_chat(name="test_list_session", datasets=[kb])
     assistant.create_session("test_1")
     assistant.create_session("test_2")
     assistant.list_sessions()

     displayed_name = "ragflow.txt"
     with open("./ragflow.txt", "rb") as file:
         blob = file.read()
+    document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
+    docs= kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
     assistant.create_session()
     displayed_name = "ragflow.txt"
     with open("./ragflow.txt","rb") as file:
         blob = file.read()
+    document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
+    docs = kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
     session = assistant.create_session()
     question = "What is AI"
     for ans in session.ask(question, stream=True):
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
     docs= kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
     session = assistant.create_session()
     assistant.delete_sessions(ids=[session.id])
     displayed_name = "ragflow.txt"
     with open("./ragflow.txt","rb") as file:
         blob = file.read()
+    document = {"displayed_name": displayed_name, "blob": blob}
     documents = []
     documents.append(document)
+    docs = kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
     session = assistant.create_session(name="old session")
     session.update({"name": "new session"})
     document = {"displayed_name":displayed_name,"blob":blob}
     documents = []
     documents.append(document)
     docs= kb.upload_documents(documents)
     for doc in docs:
+        doc.add_chunk("This is a test to add chunk")
+    assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
     assistant.create_session("test_1")
     assistant.create_session("test_2")
     assistant.list_sessions()