liuhua liuhua commited on
Commit
5b9e61c
·
1 Parent(s): 7af2206

Fix some issues in API and test (#3001)

Browse files

### What problem does this PR solve?

Fix some issues in API and test

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <10215101452@stu.ecun.edu.cn>

api/apps/sdk/chat.py CHANGED
@@ -30,9 +30,9 @@ from api.utils.api_utils import get_result
30
  @token_required
31
  def create(tenant_id):
32
  req=request.json
33
- ids= req.get("datasets")
34
  if not ids:
35
- return get_error_data_result(retmsg="`datasets` is required")
36
  for kb_id in ids:
37
  kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
38
  if not kbs:
@@ -138,7 +138,7 @@ def create(tenant_id):
138
  res["llm"] = res.pop("llm_setting")
139
  res["llm"]["model_name"] = res.pop("llm_id")
140
  del res["kb_ids"]
141
- res["datasets"] = req["datasets"]
142
  res["avatar"] = res.pop("icon")
143
  return get_result(data=res)
144
 
@@ -148,8 +148,8 @@ def update(tenant_id,chat_id):
148
  if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
149
  return get_error_data_result(retmsg='You do not own the chat')
150
  req =request.json
151
- ids = req.get("datasets")
152
- if "datasets" in req:
153
  if not ids:
154
  return get_error_data_result("`datasets` can't be empty")
155
  if ids:
@@ -214,8 +214,8 @@ def update(tenant_id,chat_id):
214
  # avatar
215
  if "avatar" in req:
216
  req["icon"] = req.pop("avatar")
217
- if "datasets" in req:
218
- req.pop("datasets")
219
  if not DialogService.update_by_id(chat_id, req):
220
  return get_error_data_result(retmsg="Chat not found!")
221
  return get_result()
 
30
  @token_required
31
  def create(tenant_id):
32
  req=request.json
33
+ ids= req.get("dataset_ids")
34
  if not ids:
35
+ return get_error_data_result(retmsg="`dataset_ids` is required")
36
  for kb_id in ids:
37
  kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
38
  if not kbs:
 
138
  res["llm"] = res.pop("llm_setting")
139
  res["llm"]["model_name"] = res.pop("llm_id")
140
  del res["kb_ids"]
141
+ res["dataset_ids"] = req["dataset_ids"]
142
  res["avatar"] = res.pop("icon")
143
  return get_result(data=res)
144
 
 
148
  if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
149
  return get_error_data_result(retmsg='You do not own the chat')
150
  req =request.json
151
+ ids = req.get("dataset_ids")
152
+ if "dataset_ids" in req:
153
  if not ids:
154
  return get_error_data_result("`datasets` can't be empty")
155
  if ids:
 
214
  # avatar
215
  if "avatar" in req:
216
  req["icon"] = req.pop("avatar")
217
+ if "dataset_ids" in req:
218
+ req.pop("dataset_ids")
219
  if not DialogService.update_by_id(chat_id, req):
220
  return get_error_data_result(retmsg="Chat not found!")
221
  return get_result()
api/apps/sdk/doc.py CHANGED
@@ -550,33 +550,32 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
550
  @token_required
551
  def retrieval_test(tenant_id):
552
  req = request.json
553
- if not req.get("datasets"):
554
  return get_error_data_result("`datasets` is required.")
555
- kb_ids = req["datasets"]
556
  if not isinstance(kb_ids,list):
557
  return get_error_data_result("`datasets` should be a list")
558
  kbs = KnowledgebaseService.get_by_ids(kb_ids)
 
 
 
559
  embd_nms = list(set([kb.embd_id for kb in kbs]))
560
  if len(embd_nms) != 1:
561
  return get_result(
562
- retmsg='Knowledge bases use different embedding models or does not exist."',
563
  retcode=RetCode.AUTHENTICATION_ERROR)
564
- if isinstance(kb_ids, str): kb_ids = [kb_ids]
565
- for id in kb_ids:
566
- if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
567
- return get_error_data_result(f"You don't own the dataset {id}.")
568
  if "question" not in req:
569
  return get_error_data_result("`question` is required.")
570
  page = int(req.get("offset", 1))
571
  size = int(req.get("limit", 1024))
572
  question = req["question"]
573
- doc_ids = req.get("documents", [])
574
- if not isinstance(req.get("documents"),list):
575
  return get_error_data_result("`documents` should be a list")
576
  doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
577
  for doc_id in doc_ids:
578
  if doc_id not in doc_ids_list:
579
- return get_error_data_result(f"You don't own the document {doc_id}")
580
  similarity_threshold = float(req.get("similarity_threshold", 0.2))
581
  vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
582
  top = int(req.get("top_k", 1024))
 
550
  @token_required
551
  def retrieval_test(tenant_id):
552
  req = request.json
553
+ if not req.get("dataset_ids"):
554
  return get_error_data_result("`datasets` is required.")
555
+ kb_ids = req["dataset_ids"]
556
  if not isinstance(kb_ids,list):
557
  return get_error_data_result("`datasets` should be a list")
558
  kbs = KnowledgebaseService.get_by_ids(kb_ids)
559
+ for id in kb_ids:
560
+ if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
561
+ return get_error_data_result(f"You don't own the dataset {id}.")
562
  embd_nms = list(set([kb.embd_id for kb in kbs]))
563
  if len(embd_nms) != 1:
564
  return get_result(
565
+ retmsg='Datasets use different embedding models."',
566
  retcode=RetCode.AUTHENTICATION_ERROR)
 
 
 
 
567
  if "question" not in req:
568
  return get_error_data_result("`question` is required.")
569
  page = int(req.get("offset", 1))
570
  size = int(req.get("limit", 1024))
571
  question = req["question"]
572
+ doc_ids = req.get("document_ids", [])
573
+ if not isinstance(doc_ids,list):
574
  return get_error_data_result("`documents` should be a list")
575
  doc_ids_list=KnowledgebaseService.list_documents_by_ids(kb_ids)
576
  for doc_id in doc_ids:
577
  if doc_id not in doc_ids_list:
578
+ return get_error_data_result(f"The datasets don't own the document {doc_id}")
579
  similarity_threshold = float(req.get("similarity_threshold", 0.2))
580
  vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
581
  top = int(req.get("top_k", 1024))
sdk/python/ragflow/modules/chat.py CHANGED
@@ -9,7 +9,7 @@ class Chat(Base):
9
  self.id = ""
10
  self.name = "assistant"
11
  self.avatar = "path/to/avatar"
12
- self.datasets = ["kb1"]
13
  self.llm = Chat.LLM(rag, {})
14
  self.prompt = Chat.Prompt(rag, {})
15
  super().__init__(rag, res_dict)
 
9
  self.id = ""
10
  self.name = "assistant"
11
  self.avatar = "path/to/avatar"
12
+ self.dataset_ids = ["kb1"]
13
  self.llm = Chat.LLM(rag, {})
14
  self.prompt = Chat.Prompt(rag, {})
15
  super().__init__(rag, res_dict)
sdk/python/ragflow/ragflow.py CHANGED
@@ -64,8 +64,8 @@ class RAGFlow:
64
  return DataSet(self, res["data"])
65
  raise Exception(res["message"])
66
 
67
- def delete_datasets(self, ids: List[str] = None, names: List[str] = None):
68
- res = self.delete("/dataset",{"ids": ids, "names": names})
69
  res=res.json()
70
  if res.get("code") != 0:
71
  raise Exception(res["message"])
@@ -89,11 +89,11 @@ class RAGFlow:
89
  return result_list
90
  raise Exception(res["message"])
91
 
92
- def create_chat(self, name: str, avatar: str = "", datasets: List[DataSet] = [],
93
  llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
94
  dataset_list = []
95
- for dataset in datasets:
96
- dataset_list.append(dataset.id)
97
 
98
  if llm is None:
99
  llm = Chat.LLM(self, {"model_name": None,
@@ -126,7 +126,7 @@ class RAGFlow:
126
 
127
  temp_dict = {"name": name,
128
  "avatar": avatar,
129
- "datasets": dataset_list,
130
  "llm": llm.to_json(),
131
  "prompt": prompt.to_json()}
132
  res = self.post("/chat", temp_dict)
@@ -154,7 +154,9 @@ class RAGFlow:
154
  raise Exception(res["message"])
155
 
156
 
157
- def retrieve(self, datasets,documents,question="", offset=1, limit=1024, similarity_threshold=0.2,vector_similarity_weight=0.3,top_k=1024,rerank_id:str=None,keyword:bool=False,):
 
 
158
  data_json ={
159
  "offset": offset,
160
  "limit": limit,
@@ -164,10 +166,9 @@ class RAGFlow:
164
  "rerank_id": rerank_id,
165
  "keyword": keyword,
166
  "question": question,
167
- "datasets": datasets,
168
- "documents": documents
169
  }
170
-
171
  # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
172
  res = self.post(f'/retrieval',json=data_json)
173
  res = res.json()
 
64
  return DataSet(self, res["data"])
65
  raise Exception(res["message"])
66
 
67
+ def delete_datasets(self, ids: List[str]):
68
+ res = self.delete("/dataset",{"ids": ids})
69
  res=res.json()
70
  if res.get("code") != 0:
71
  raise Exception(res["message"])
 
89
  return result_list
90
  raise Exception(res["message"])
91
 
92
+ def create_chat(self, name: str, avatar: str = "", dataset_ids: List[str] = [],
93
  llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
94
  dataset_list = []
95
+ for id in dataset_ids:
96
+ dataset_list.append(id)
97
 
98
  if llm is None:
99
  llm = Chat.LLM(self, {"model_name": None,
 
126
 
127
  temp_dict = {"name": name,
128
  "avatar": avatar,
129
+ "dataset_ids": dataset_list,
130
  "llm": llm.to_json(),
131
  "prompt": prompt.to_json()}
132
  res = self.post("/chat", temp_dict)
 
154
  raise Exception(res["message"])
155
 
156
 
157
+ def retrieve(self, dataset_ids, document_ids=None, question="", offset=1, limit=1024, similarity_threshold=0.2, vector_similarity_weight=0.3, top_k=1024, rerank_id:str=None, keyword:bool=False, ):
158
+ if document_ids is None:
159
+ document_ids = []
160
  data_json ={
161
  "offset": offset,
162
  "limit": limit,
 
166
  "rerank_id": rerank_id,
167
  "keyword": keyword,
168
  "question": question,
169
+ "datasets": dataset_ids,
170
+ "documents": document_ids
171
  }
 
172
  # Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
173
  res = self.post(f'/retrieval',json=data_json)
174
  res = res.json()
sdk/python/test/t_chat.py CHANGED
@@ -1,5 +1,4 @@
1
  from ragflow import RAGFlow, Chat
2
- import time
3
  HOST_ADDRESS = 'http://127.0.0.1:9380'
4
 
5
  def test_create_chat_with_name(get_api_key_fixture):
@@ -12,13 +11,10 @@ def test_create_chat_with_name(get_api_key_fixture):
12
  document = {"displayed_name":displayed_name,"blob":blob}
13
  documents = []
14
  documents.append(document)
15
- doc_ids = []
16
  docs= kb.upload_documents(documents)
17
  for doc in docs:
18
- doc_ids.append(doc.id)
19
- kb.async_parse_documents(doc_ids)
20
- time.sleep(60)
21
- rag.create_chat("test_create", datasets=[kb])
22
 
23
 
24
  def test_update_chat_with_name(get_api_key_fixture):
@@ -31,13 +27,10 @@ def test_update_chat_with_name(get_api_key_fixture):
31
  document = {"displayed_name": displayed_name, "blob": blob}
32
  documents = []
33
  documents.append(document)
34
- doc_ids = []
35
  docs = kb.upload_documents(documents)
36
  for doc in docs:
37
- doc_ids.append(doc.id)
38
- kb.async_parse_documents(doc_ids)
39
- time.sleep(60)
40
- chat = rag.create_chat("test_update", datasets=[kb])
41
  chat.update({"name": "new_chat"})
42
 
43
 
@@ -51,17 +44,27 @@ def test_delete_chats_with_success(get_api_key_fixture):
51
  document = {"displayed_name": displayed_name, "blob": blob}
52
  documents = []
53
  documents.append(document)
54
- doc_ids = []
55
  docs = kb.upload_documents(documents)
56
  for doc in docs:
57
- doc_ids.append(doc.id)
58
- kb.async_parse_documents(doc_ids)
59
- time.sleep(60)
60
- chat = rag.create_chat("test_delete", datasets=[kb])
61
  rag.delete_chats(ids=[chat.id])
62
 
 
63
  API_KEY = get_api_key_fixture
64
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
 
 
 
 
 
 
 
 
 
 
 
 
65
  rag.list_chats()
66
 
67
 
 
1
  from ragflow import RAGFlow, Chat
 
2
  HOST_ADDRESS = 'http://127.0.0.1:9380'
3
 
4
  def test_create_chat_with_name(get_api_key_fixture):
 
11
  document = {"displayed_name":displayed_name,"blob":blob}
12
  documents = []
13
  documents.append(document)
 
14
  docs= kb.upload_documents(documents)
15
  for doc in docs:
16
+ doc.add_chunk("This is a test to add chunk")
17
+ rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
18
 
19
 
20
  def test_update_chat_with_name(get_api_key_fixture):
 
27
  document = {"displayed_name": displayed_name, "blob": blob}
28
  documents = []
29
  documents.append(document)
 
30
  docs = kb.upload_documents(documents)
31
  for doc in docs:
32
+ doc.add_chunk("This is a test to add chunk")
33
+ chat = rag.create_chat("test_update", dataset_ids=[kb.id])
 
 
34
  chat.update({"name": "new_chat"})
35
 
36
 
 
44
  document = {"displayed_name": displayed_name, "blob": blob}
45
  documents = []
46
  documents.append(document)
 
47
  docs = kb.upload_documents(documents)
48
  for doc in docs:
49
+ doc.add_chunk("This is a test to add chunk")
50
+ chat = rag.create_chat("test_delete", dataset_ids=[kb.id])
 
 
51
  rag.delete_chats(ids=[chat.id])
52
 
53
+ def test_list_chats_with_success(get_api_key_fixture):
54
  API_KEY = get_api_key_fixture
55
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
56
+ kb = rag.create_dataset(name="test_delete_chat")
57
+ displayed_name = "ragflow.txt"
58
+ with open("./ragflow.txt", "rb") as file:
59
+ blob = file.read()
60
+ document = {"displayed_name": displayed_name, "blob": blob}
61
+ documents = []
62
+ documents.append(document)
63
+ docs = kb.upload_documents(documents)
64
+ for doc in docs:
65
+ doc.add_chunk("This is a test to add chunk")
66
+ rag.create_chat("test_list_1", dataset_ids=[kb.id])
67
+ rag.create_chat("test_list_2", dataset_ids=[kb.id])
68
  rag.list_chats()
69
 
70
 
sdk/python/test/t_session.py CHANGED
@@ -10,16 +10,13 @@ def test_create_session_with_success(get_api_key_fixture):
10
  displayed_name = "ragflow.txt"
11
  with open("./ragflow.txt", "rb") as file:
12
  blob = file.read()
13
- document = {"displayed_name": displayed_name, "blob": blob}
14
  documents = []
15
  documents.append(document)
16
- doc_ids = []
17
- docs = kb.upload_documents(documents)
18
  for doc in docs:
19
- doc_ids.append(doc.id)
20
- kb.async_parse_documents(doc_ids)
21
- time.sleep(60)
22
- assistant = rag.create_chat(name="test_create_session", datasets=[kb])
23
  assistant.create_session()
24
 
25
 
@@ -30,16 +27,13 @@ def test_create_conversation_with_success(get_api_key_fixture):
30
  displayed_name = "ragflow.txt"
31
  with open("./ragflow.txt","rb") as file:
32
  blob = file.read()
33
- document = {"displayed_name":displayed_name,"blob":blob}
34
  documents = []
35
  documents.append(document)
36
- doc_ids = []
37
- docs= kb.upload_documents(documents)
38
  for doc in docs:
39
- doc_ids.append(doc.id)
40
- kb.async_parse_documents(doc_ids)
41
- time.sleep(60)
42
- assistant = rag.create_chat(name="test_create_conversation", datasets=[kb])
43
  session = assistant.create_session()
44
  question = "What is AI"
45
  for ans in session.ask(question, stream=True):
@@ -57,13 +51,10 @@ def test_delete_sessions_with_success(get_api_key_fixture):
57
  document = {"displayed_name":displayed_name,"blob":blob}
58
  documents = []
59
  documents.append(document)
60
- doc_ids = []
61
  docs= kb.upload_documents(documents)
62
  for doc in docs:
63
- doc_ids.append(doc.id)
64
- kb.async_parse_documents(doc_ids)
65
- time.sleep(60)
66
- assistant = rag.create_chat(name="test_delete_session", datasets=[kb])
67
  session = assistant.create_session()
68
  assistant.delete_sessions(ids=[session.id])
69
 
@@ -74,16 +65,13 @@ def test_update_session_with_name(get_api_key_fixture):
74
  displayed_name = "ragflow.txt"
75
  with open("./ragflow.txt","rb") as file:
76
  blob = file.read()
77
- document = {"displayed_name":displayed_name,"blob":blob}
78
  documents = []
79
  documents.append(document)
80
- doc_ids = []
81
- docs= kb.upload_documents(documents)
82
  for doc in docs:
83
- doc_ids.append(doc.id)
84
- kb.async_parse_documents(doc_ids)
85
- time.sleep(60)
86
- assistant = rag.create_chat(name="test_update_session", datasets=[kb])
87
  session = assistant.create_session(name="old session")
88
  session.update({"name": "new session"})
89
 
@@ -98,13 +86,10 @@ def test_list_sessions_with_success(get_api_key_fixture):
98
  document = {"displayed_name":displayed_name,"blob":blob}
99
  documents = []
100
  documents.append(document)
101
- doc_ids = []
102
  docs= kb.upload_documents(documents)
103
  for doc in docs:
104
- doc_ids.append(doc.id)
105
- kb.async_parse_documents(doc_ids)
106
- time.sleep(60)
107
- assistant = rag.create_chat(name="test_list_session", datasets=[kb])
108
  assistant.create_session("test_1")
109
  assistant.create_session("test_2")
110
  assistant.list_sessions()
 
10
  displayed_name = "ragflow.txt"
11
  with open("./ragflow.txt", "rb") as file:
12
  blob = file.read()
13
+ document = {"displayed_name":displayed_name,"blob":blob}
14
  documents = []
15
  documents.append(document)
16
+ docs= kb.upload_documents(documents)
 
17
  for doc in docs:
18
+ doc.add_chunk("This is a test to add chunk")
19
+ assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
20
  assistant.create_session()
21
 
22
 
 
27
  displayed_name = "ragflow.txt"
28
  with open("./ragflow.txt","rb") as file:
29
  blob = file.read()
30
+ document = {"displayed_name": displayed_name, "blob": blob}
31
  documents = []
32
  documents.append(document)
33
+ docs = kb.upload_documents(documents)
 
34
  for doc in docs:
35
+ doc.add_chunk("This is a test to add chunk")
36
+ assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
37
  session = assistant.create_session()
38
  question = "What is AI"
39
  for ans in session.ask(question, stream=True):
 
51
  document = {"displayed_name":displayed_name,"blob":blob}
52
  documents = []
53
  documents.append(document)
 
54
  docs= kb.upload_documents(documents)
55
  for doc in docs:
56
+ doc.add_chunk("This is a test to add chunk")
57
+ assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
58
  session = assistant.create_session()
59
  assistant.delete_sessions(ids=[session.id])
60
 
 
65
  displayed_name = "ragflow.txt"
66
  with open("./ragflow.txt","rb") as file:
67
  blob = file.read()
68
+ document = {"displayed_name": displayed_name, "blob": blob}
69
  documents = []
70
  documents.append(document)
71
+ docs = kb.upload_documents(documents)
 
72
  for doc in docs:
73
+ doc.add_chunk("This is a test to add chunk")
74
+ assistant = rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
75
  session = assistant.create_session(name="old session")
76
  session.update({"name": "new session"})
77
 
 
86
  document = {"displayed_name":displayed_name,"blob":blob}
87
  documents = []
88
  documents.append(document)
 
89
  docs= kb.upload_documents(documents)
90
  for doc in docs:
91
+ doc.add_chunk("This is a test to add chunk")
92
+ assistant=rag.create_chat("test_create", dataset_ids=[kb.id])
 
 
93
  assistant.create_session("test_1")
94
  assistant.create_session("test_2")
95
  assistant.list_sessions()