cecilia-uu commited on
Commit
b2dbbc3
·
1 Parent(s): 58e43fa

created get_dataset, update_dataset API and fixed: delete (#1201)

Browse files

### What problem does this PR solve?

Added get_dataset and update_dataset API.
Fixed delete_dataset.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [x] Documentation Update

api/apps/dataset_api.py CHANGED
@@ -20,6 +20,7 @@ import re
20
  from datetime import datetime, timedelta
21
  from flask import request, Response
22
  from flask_login import login_required, current_user
 
23
 
24
  from api.db import FileType, ParserType, FileSource, StatusEnum
25
  from api.db.db_models import APIToken, API4Conversation, Task, File
@@ -45,6 +46,7 @@ from api.utils.api_utils import construct_json_result, construct_result, constru
45
  from api.contants import NAME_LENGTH_LIMIT
46
 
47
  # ------------------------------ create a dataset ---------------------------------------
 
48
  @manager.route('/', methods=['POST'])
49
  @login_required # use login
50
  @validate_request("name") # check name key
@@ -104,18 +106,20 @@ def create_dataset():
104
  request_body["id"] = get_uuid()
105
  request_body["tenant_id"] = tenant_id
106
  request_body["created_by"] = tenant_id
107
- e, t = TenantService.get_by_id(tenant_id)
108
- if not e:
109
  return construct_result(code=RetCode.AUTHENTICATION_ERROR, message="Tenant not found.")
110
  request_body["embd_id"] = t.embd_id
111
  if not KnowledgebaseService.save(**request_body):
112
  # failed to create new dataset
113
  return construct_result()
114
- return construct_json_result(data={"dataset_name": request_body["name"]})
 
115
  except Exception as e:
116
  return construct_error_response(e)
117
 
118
  # -----------------------------list datasets-------------------------------------------------------
 
119
  @manager.route('/', methods=['GET'])
120
  @login_required
121
  def list_datasets():
@@ -125,67 +129,140 @@ def list_datasets():
125
  desc = request.args.get("desc", True)
126
  try:
127
  tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
128
- kbs = KnowledgebaseService.get_by_tenant_ids_by_offset(
129
  [m["tenant_id"] for m in tenants], current_user.id, int(offset), int(count), orderby, desc)
130
- return construct_json_result(data=kbs, code=RetCode.DATA_ERROR, message=f"attempt to list datasets")
131
  except Exception as e:
132
  return construct_error_response(e)
 
 
133
 
134
  # ---------------------------------delete a dataset ----------------------------
135
 
136
  @manager.route('/<dataset_id>', methods=['DELETE'])
137
  @login_required
138
- @validate_request("dataset_id")
139
  def remove_dataset(dataset_id):
140
- req = request.json
141
  try:
142
- kbs = KnowledgebaseService.query(
143
- created_by=current_user.id, id=req["dataset_id"])
144
- if not kbs:
145
- return construct_json_result(
146
- data=False, message=f'Only owner of knowledgebase authorized for this operation.',
147
- code=RetCode.OPERATING_ERROR)
148
-
149
- for doc in DocumentService.query(kb_id=req["dataset_id"]):
150
- if not DocumentService.remove_document(doc, kbs[0].tenant_id):
151
- return construct_json_result(
152
- message="Database error (Document removal)!")
 
 
 
 
153
  f2d = File2DocumentService.get_by_document_id(doc.id)
154
  FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
155
  File2DocumentService.delete_by_document_id(doc.id)
156
 
157
- if not KnowledgebaseService.delete_by_id(req["dataset_id"]):
158
- return construct_json_result(
159
- message="Database error (Knowledgebase removal)!")
160
- return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to remove dataset: {dataset_id}")
 
 
161
  except Exception as e:
162
  return construct_error_response(e)
163
 
164
  # ------------------------------ get details of a dataset ----------------------------------------
 
165
  @manager.route('/<dataset_id>', methods=['GET'])
166
  @login_required
167
- @validate_request("dataset_id")
168
- def get_dataset():
169
- dataset_id = request.args["dataset_id"]
170
  try:
171
  dataset = KnowledgebaseService.get_detail(dataset_id)
172
  if not dataset:
173
- return construct_json_result(
174
- message="Can't find this knowledgebase!")
175
- return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to get detail of dataset: {dataset_id}")
176
  except Exception as e:
177
  return construct_json_result(e)
178
 
179
  # ------------------------------ update a dataset --------------------------------------------
 
180
  @manager.route('/<dataset_id>', methods=['PUT'])
181
  @login_required
182
- @validate_request("name")
183
  def update_dataset(dataset_id):
184
- return construct_json_result(code=RetCode.DATA_ERROR, message=f"attempt to update dataset: {dataset_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
 
 
 
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
 
 
 
 
 
188
 
 
189
 
 
 
 
 
190
 
 
 
 
 
 
191
 
 
 
 
 
 
 
 
 
 
20
  from datetime import datetime, timedelta
21
  from flask import request, Response
22
  from flask_login import login_required, current_user
23
+ from httpx import HTTPError
24
 
25
  from api.db import FileType, ParserType, FileSource, StatusEnum
26
  from api.db.db_models import APIToken, API4Conversation, Task, File
 
46
  from api.contants import NAME_LENGTH_LIMIT
47
 
48
  # ------------------------------ create a dataset ---------------------------------------
49
+
50
  @manager.route('/', methods=['POST'])
51
  @login_required # use login
52
  @validate_request("name") # check name key
 
106
  request_body["id"] = get_uuid()
107
  request_body["tenant_id"] = tenant_id
108
  request_body["created_by"] = tenant_id
109
+ exist, t = TenantService.get_by_id(tenant_id)
110
+ if not exist:
111
  return construct_result(code=RetCode.AUTHENTICATION_ERROR, message="Tenant not found.")
112
  request_body["embd_id"] = t.embd_id
113
  if not KnowledgebaseService.save(**request_body):
114
  # failed to create new dataset
115
  return construct_result()
116
+ return construct_json_result(code=RetCode.SUCCESS,
117
+ data={"dataset_name": request_body["name"], "dataset_id": request_body["id"]})
118
  except Exception as e:
119
  return construct_error_response(e)
120
 
121
  # -----------------------------list datasets-------------------------------------------------------
122
+
123
  @manager.route('/', methods=['GET'])
124
  @login_required
125
  def list_datasets():
 
129
  desc = request.args.get("desc", True)
130
  try:
131
  tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
132
+ datasets = KnowledgebaseService.get_by_tenant_ids_by_offset(
133
  [m["tenant_id"] for m in tenants], current_user.id, int(offset), int(count), orderby, desc)
134
+ return construct_json_result(data=datasets, code=RetCode.SUCCESS, message=f"List datasets successfully!")
135
  except Exception as e:
136
  return construct_error_response(e)
137
+ except HTTPError as http_err:
138
+ return construct_json_result(http_err)
139
 
140
  # ---------------------------------delete a dataset ----------------------------
141
 
142
  @manager.route('/<dataset_id>', methods=['DELETE'])
143
  @login_required
 
144
  def remove_dataset(dataset_id):
 
145
  try:
146
+ datasets = KnowledgebaseService.query(created_by=current_user.id, id=dataset_id)
147
+
148
+ # according to the id, searching for the dataset
149
+ if not datasets:
150
+ return construct_json_result(message=f'The dataset cannot be found for your current account.',
151
+ code=RetCode.OPERATING_ERROR)
152
+
153
+ # Iterating the documents inside the dataset
154
+ for doc in DocumentService.query(kb_id=dataset_id):
155
+ if not DocumentService.remove_document(doc, datasets[0].tenant_id):
156
+ # the process of deleting failed
157
+ return construct_json_result(code=RetCode.DATA_ERROR,
158
+ message="There was an error during the document removal process. "
159
+ "Please check the status of the RAGFlow server and try the removal again.")
160
+ # delete the other files
161
  f2d = File2DocumentService.get_by_document_id(doc.id)
162
  FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
163
  File2DocumentService.delete_by_document_id(doc.id)
164
 
165
+ # delete the dataset
166
+ if not KnowledgebaseService.delete_by_id(dataset_id):
167
+ return construct_json_result(code=RetCode.DATA_ERROR, message="There was an error during the dataset removal process. "
168
+ "Please check the status of the RAGFlow server and try the removal again.")
169
+ # success
170
+ return construct_json_result(code=RetCode.SUCCESS, message=f"Remove dataset: {dataset_id} successfully")
171
  except Exception as e:
172
  return construct_error_response(e)
173
 
174
  # ------------------------------ get details of a dataset ----------------------------------------
175
+
176
  @manager.route('/<dataset_id>', methods=['GET'])
177
  @login_required
178
+ def get_dataset(dataset_id):
 
 
179
  try:
180
  dataset = KnowledgebaseService.get_detail(dataset_id)
181
  if not dataset:
182
+ return construct_json_result(code=RetCode.DATA_ERROR, message="Can't find this dataset!")
183
+ return construct_json_result(data=dataset, code=RetCode.SUCCESS)
 
184
  except Exception as e:
185
  return construct_json_result(e)
186
 
187
  # ------------------------------ update a dataset --------------------------------------------
188
+
189
  @manager.route('/<dataset_id>', methods=['PUT'])
190
  @login_required
 
191
  def update_dataset(dataset_id):
192
+ req = request.json
193
+ try:
194
+ # the request cannot be empty
195
+ if not req:
196
+ return construct_json_result(code=RetCode.DATA_ERROR, message="Please input at least one parameter that "
197
+ "you want to update!")
198
+ # check whether the dataset can be found
199
+ if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
200
+ return construct_json_result(message=f'Only the owner of knowledgebase is authorized for this operation!',
201
+ code=RetCode.OPERATING_ERROR)
202
+
203
+ exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
204
+ # check whether there is this dataset
205
+ if not exist:
206
+ return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
207
+
208
+ if 'name' in req:
209
+ name = req["name"].strip()
210
+ # check whether there is duplicate name
211
+ if name.lower() != dataset.name.lower() \
212
+ and len(KnowledgebaseService.query(name=name, tenant_id=current_user.id,
213
+ status=StatusEnum.VALID.value)) > 1:
214
+ return construct_json_result(code=RetCode.DATA_ERROR, message=f"The name: {name.lower()} is already used by other "
215
+ f"datasets. Please choose a different name.")
216
 
217
+ dataset_updating_data = {}
218
+ chunk_num = req.get("chunk_num")
219
+ # modify the value of 11 parameters
220
 
221
+ # 2 parameters: embedding id and chunk method
222
+ # only if chunk_num is 0, the user can update the embedding id
223
+ if req.get('embedding_model_id'):
224
+ if chunk_num == 0:
225
+ dataset_updating_data['embd_id'] = req['embedding_model_id']
226
+ else:
227
+ construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document in this "
228
+ "dataset, so you cannot change the embedding "
229
+ "model.")
230
+ # only if chunk_num is 0, the user can update the chunk_method
231
+ if req.get("chunk_method"):
232
+ if chunk_num == 0:
233
+ dataset_updating_data['parser_id'] = req["chunk_method"]
234
+ else:
235
+ construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document "
236
+ "in this dataset, so you cannot "
237
+ "change the chunk method.")
238
+ # convert the photo parameter to avatar
239
+ if req.get("photo"):
240
+ dataset_updating_data['avatar'] = req["photo"]
241
 
242
+ # layout_recognize
243
+ if 'layout_recognize' in req:
244
+ if 'parser_config' not in dataset_updating_data:
245
+ dataset_updating_data['parser_config'] = {}
246
+ dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
247
 
248
+ # TODO: updating use_raptor needs to construct a class
249
 
250
+ # 6 parameters
251
+ for key in ['name', 'language', 'description', 'permission', 'id', 'token_num']:
252
+ if key in req:
253
+ dataset_updating_data[key] = req.get(key)
254
 
255
+ # update
256
+ if not KnowledgebaseService.update_by_id(dataset.id, dataset_updating_data):
257
+ return construct_json_result(code=RetCode.OPERATING_ERROR, message="Failed to update! "
258
+ "Please check the status of RAGFlow "
259
+ "server and try again!")
260
 
261
+ exist, dataset = KnowledgebaseService.get_by_id(dataset.id)
262
+ if not exist:
263
+ return construct_json_result(code=RetCode.DATA_ERROR, message="Failed to get the dataset "
264
+ "using the dataset ID.")
265
+
266
+ return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
267
+ except Exception as e:
268
+ return construct_error_response(e)
docs/references/ragflow_api.md CHANGED
@@ -55,7 +55,8 @@ You are *required* to save the `data.id` value returned in the response data, wh
55
  {
56
  "code": 0,
57
  "data": {
58
- "dataset_name": "kb1"
 
59
  },
60
  "message": "success"
61
  }
 
55
  {
56
  "code": 0,
57
  "data": {
58
+ "dataset_name": "kb1",
59
+ "dataset_id": "375e8ada2d3c11ef98f93043d7ee537e"
60
  },
61
  "message": "success"
62
  }
sdk/python/ragflow/ragflow.py CHANGED
@@ -17,11 +17,9 @@ import os
17
  import requests
18
  import json
19
 
20
- from httpx import HTTPError
21
-
22
 
23
  class RAGFlow:
24
- def __init__(self, user_key, base_url, version = 'v1'):
25
  '''
26
  api_url: http://<host_address>/api/v1
27
  dataset_url: http://<host_address>/api/v1/dataset
@@ -41,14 +39,10 @@ class RAGFlow:
41
 
42
  def delete_dataset(self, dataset_name):
43
  dataset_id = self.find_dataset_id_by_name(dataset_name)
44
- if not dataset_id:
45
- return {"success": False, "message": "Dataset not found."}
46
 
47
- res = requests.delete(f"{self.dataset_url}/{dataset_id}", headers=self.authorization_header)
48
- if res.status_code == 200:
49
- return {"success": True, "message": "Dataset deleted successfully!"}
50
- else:
51
- return {"success": False, "message": f"Other status code: {res.status_code}"}
52
 
53
  def find_dataset_id_by_name(self, dataset_name):
54
  res = requests.get(self.dataset_url, headers=self.authorization_header)
@@ -64,42 +58,18 @@ class RAGFlow:
64
  "orderby": orderby,
65
  "desc": desc
66
  }
67
- try:
68
- response = requests.get(url=self.dataset_url, params=params, headers=self.authorization_header)
69
- response.raise_for_status() # if it is not 200
70
- original_data = response.json()
71
- # TODO: format the data
72
- # print(original_data)
73
- # # Process the original data into the desired format
74
- # formatted_data = {
75
- # "datasets": [
76
- # {
77
- # "id": dataset["id"],
78
- # "created": dataset["create_time"], # Adjust the key based on the actual response
79
- # "fileCount": dataset["doc_num"], # Adjust the key based on the actual response
80
- # "name": dataset["name"]
81
- # }
82
- # for dataset in original_data
83
- # ]
84
- # }
85
- return response.status_code, original_data
86
- except HTTPError as http_err:
87
- print(f"HTTP error occurred: {http_err}")
88
- except Exception as err:
89
- print(f"An error occurred: {err}")
90
 
91
- def get_dataset(self, dataset_id):
 
92
  endpoint = f"{self.dataset_url}/{dataset_id}"
93
- response = requests.get(endpoint)
94
- if response.status_code == 200:
95
- return response.json()
96
- else:
97
- return None
98
 
99
- def update_dataset(self, dataset_id, params):
100
  endpoint = f"{self.dataset_url}/{dataset_id}"
101
- response = requests.put(endpoint, json=params)
102
- if response.status_code == 200:
103
- return True
104
- else:
105
- return False
 
17
  import requests
18
  import json
19
 
 
 
20
 
21
  class RAGFlow:
22
+ def __init__(self, user_key, base_url, version='v1'):
23
  '''
24
  api_url: http://<host_address>/api/v1
25
  dataset_url: http://<host_address>/api/v1/dataset
 
39
 
40
  def delete_dataset(self, dataset_name):
41
  dataset_id = self.find_dataset_id_by_name(dataset_name)
 
 
42
 
43
+ endpoint = f"{self.dataset_url}/{dataset_id}"
44
+ res = requests.delete(endpoint, headers=self.authorization_header)
45
+ return res.json()
 
 
46
 
47
  def find_dataset_id_by_name(self, dataset_name):
48
  res = requests.get(self.dataset_url, headers=self.authorization_header)
 
58
  "orderby": orderby,
59
  "desc": desc
60
  }
61
+ response = requests.get(url=self.dataset_url, params=params, headers=self.authorization_header)
62
+ return response.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ def get_dataset(self, dataset_name):
65
+ dataset_id = self.find_dataset_id_by_name(dataset_name)
66
  endpoint = f"{self.dataset_url}/{dataset_id}"
67
+ response = requests.get(endpoint, headers=self.authorization_header)
68
+ return response.json()
69
+
70
+ def update_dataset(self, dataset_name, **params):
71
+ dataset_id = self.find_dataset_id_by_name(dataset_name)
72
 
 
73
  endpoint = f"{self.dataset_url}/{dataset_id}"
74
+ response = requests.put(endpoint, json=params, headers=self.authorization_header)
75
+ return response.json()
 
 
 
sdk/python/test/common.py CHANGED
@@ -1,4 +1,4 @@
1
 
2
 
3
- API_KEY = 'ImFmNWQ3YTY0Mjg5NjExZWZhNTdjMzA0M2Q3ZWU1MzdlIg.ZmldwA.9oP9pVtuEQSpg-Z18A2eOkWO-3E'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
 
1
 
2
 
3
+ API_KEY = 'ImFhMmJhZmUwMmQxNzExZWZhZDdmMzA0M2Q3ZWU1MzdlIg.ZnDsIQ.u-0-_qCRU6a4WICxyAPsjaafyOo'
4
  HOST_ADDRESS = 'http://127.0.0.1:9380'
sdk/python/test/test_dataset.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from test_sdkbase import TestSdk
2
  from ragflow import RAGFlow
3
  import pytest
@@ -15,6 +16,19 @@ class TestDataset(TestSdk):
15
  4. update the kb
16
  5. delete the kb
17
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # -----------------------create_dataset---------------------------------
19
  def test_create_dataset_with_success(self):
20
  """
@@ -23,7 +37,7 @@ class TestDataset(TestSdk):
23
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
24
  # create a kb
25
  res = ragflow.create_dataset("kb1")
26
- assert res['code'] == 0 and res['message'] == 'success'
27
 
28
  def test_create_dataset_with_empty_name(self):
29
  """
@@ -31,7 +45,7 @@ class TestDataset(TestSdk):
31
  """
32
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
33
  res = ragflow.create_dataset("")
34
- assert res['message'] == 'Empty dataset name' and res['code'] == 102
35
 
36
  def test_create_dataset_with_name_exceeding_limit(self):
37
  """
@@ -41,7 +55,7 @@ class TestDataset(TestSdk):
41
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
42
  res = ragflow.create_dataset(name)
43
  assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
44
- and res['code'] == 102)
45
 
46
  def test_create_dataset_name_with_space_in_the_middle(self):
47
  """
@@ -50,7 +64,7 @@ class TestDataset(TestSdk):
50
  name = "k b"
51
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
52
  res = ragflow.create_dataset(name)
53
- assert (res['code'] == 0 and res['message'] == 'success')
54
 
55
  def test_create_dataset_name_with_space_in_the_head(self):
56
  """
@@ -59,7 +73,7 @@ class TestDataset(TestSdk):
59
  name = " kb"
60
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
61
  res = ragflow.create_dataset(name)
62
- assert (res['code'] == 0 and res['message'] == 'success')
63
 
64
  def test_create_dataset_name_with_space_in_the_tail(self):
65
  """
@@ -68,7 +82,7 @@ class TestDataset(TestSdk):
68
  name = "kb "
69
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
70
  res = ragflow.create_dataset(name)
71
- assert (res['code'] == 0 and res['message'] == 'success')
72
 
73
  def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
74
  """
@@ -78,7 +92,7 @@ class TestDataset(TestSdk):
78
  name = " " + "k" * NAME_LENGTH_LIMIT + " "
79
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
80
  res = ragflow.create_dataset(name)
81
- assert (res['code'] == 0 and res['message'] == 'success')
82
 
83
  def test_create_dataset_with_two_same_name(self):
84
  """
@@ -86,9 +100,9 @@ class TestDataset(TestSdk):
86
  """
87
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
88
  res = ragflow.create_dataset("kb")
89
- assert (res['code'] == 0 and res['message'] == 'success')
90
  res = ragflow.create_dataset("kb")
91
- assert (res['code'] == 0 and res['message'] == 'success')
92
 
93
  def test_create_dataset_with_only_space_in_the_name(self):
94
  """
@@ -96,7 +110,7 @@ class TestDataset(TestSdk):
96
  """
97
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
98
  res = ragflow.create_dataset(" ")
99
- assert (res['code'] == 0 and res['message'] == 'success')
100
 
101
  def test_create_dataset_with_space_number_exceeding_limit(self):
102
  """
@@ -105,7 +119,7 @@ class TestDataset(TestSdk):
105
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
106
  name = " " * NAME_LENGTH_LIMIT
107
  res = ragflow.create_dataset(name)
108
- assert (res['code'] == 0 and res['message'] == 'success')
109
 
110
  def test_create_dataset_with_name_having_return(self):
111
  """
@@ -114,7 +128,7 @@ class TestDataset(TestSdk):
114
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
115
  name = "kb\n"
116
  res = ragflow.create_dataset(name)
117
- assert (res['code'] == 0 and res['message'] == 'success')
118
 
119
  def test_create_dataset_with_name_having_the_null_character(self):
120
  """
@@ -123,7 +137,7 @@ class TestDataset(TestSdk):
123
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
124
  name = "kb\0"
125
  res = ragflow.create_dataset(name)
126
- assert (res['code'] == 0 and res['message'] == 'success')
127
 
128
  # -----------------------list_dataset---------------------------------
129
  def test_list_dataset_success(self):
@@ -133,10 +147,7 @@ class TestDataset(TestSdk):
133
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
134
  # Call the list_datasets method
135
  response = ragflow.list_dataset()
136
-
137
- code, datasets = response
138
-
139
- assert code == 200
140
 
141
  def test_list_dataset_with_checking_size_and_name(self):
142
  """
@@ -152,12 +163,12 @@ class TestDataset(TestSdk):
152
  dataset_name = response['data']['dataset_name']
153
  real_name_to_create.add(dataset_name)
154
 
155
- status_code, listed_data = ragflow.list_dataset(0, 3)
156
- listed_data = listed_data['data']
157
 
158
  listed_names = {d['name'] for d in listed_data}
159
  assert listed_names == real_name_to_create
160
- assert status_code == 200
161
  assert len(listed_data) == len(datasets_to_create)
162
 
163
  def test_list_dataset_with_getting_empty_result(self):
@@ -174,12 +185,13 @@ class TestDataset(TestSdk):
174
  dataset_name = response['data']['dataset_name']
175
  real_name_to_create.add(dataset_name)
176
 
177
- status_code, listed_data = ragflow.list_dataset(0, 0)
178
- listed_data = listed_data['data']
179
 
180
  listed_names = {d['name'] for d in listed_data}
 
181
  assert listed_names == real_name_to_create
182
- assert status_code == 200
183
  assert len(listed_data) == 0
184
 
185
  def test_list_dataset_with_creating_100_knowledge_bases(self):
@@ -196,12 +208,12 @@ class TestDataset(TestSdk):
196
  dataset_name = response['data']['dataset_name']
197
  real_name_to_create.add(dataset_name)
198
 
199
- status_code, listed_data = ragflow.list_dataset(0, 100)
200
- listed_data = listed_data['data']
201
 
202
  listed_names = {d['name'] for d in listed_data}
203
  assert listed_names == real_name_to_create
204
- assert status_code == 200
205
  assert len(listed_data) == 100
206
 
207
  def test_list_dataset_with_showing_one_dataset(self):
@@ -210,9 +222,8 @@ class TestDataset(TestSdk):
210
  """
211
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
212
  response = ragflow.list_dataset(0, 1)
213
- code, response = response
214
  datasets = response['data']
215
- assert len(datasets) == 1
216
 
217
  def test_list_dataset_failure(self):
218
  """
@@ -220,8 +231,7 @@ class TestDataset(TestSdk):
220
  """
221
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
222
  response = ragflow.list_dataset(-1, -1)
223
- _, res = response
224
- assert "IndexError" in res['message']
225
 
226
  def test_list_dataset_for_empty_datasets(self):
227
  """
@@ -229,9 +239,8 @@ class TestDataset(TestSdk):
229
  """
230
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
231
  response = ragflow.list_dataset()
232
- code, response = response
233
  datasets = response['data']
234
- assert len(datasets) == 0
235
 
236
  # TODO: have to set the limitation of the number of datasets
237
 
@@ -245,8 +254,8 @@ class TestDataset(TestSdk):
245
  res = ragflow.create_dataset("kb0")
246
  real_dataset_name = res['data']['dataset_name']
247
  # delete this dataset
248
- result = ragflow.delete_dataset(real_dataset_name)
249
- assert result["success"] is True
250
 
251
  def test_delete_dataset_with_not_existing_dataset(self):
252
  """
@@ -254,7 +263,7 @@ class TestDataset(TestSdk):
254
  """
255
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
256
  res = ragflow.delete_dataset("weird_dataset")
257
- assert res["success"] is False
258
 
259
  def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
260
  """
@@ -273,40 +282,46 @@ class TestDataset(TestSdk):
273
 
274
  for name in real_name_to_create:
275
  res = ragflow.delete_dataset(name)
276
- assert res["success"] is True
277
 
278
  def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
279
  """
280
  Test deleting a dataset when its name has space in the middle.
281
  """
282
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
 
283
  res = ragflow.delete_dataset("k b")
284
- print(res)
285
- assert res["success"] is True
286
 
287
  def test_delete_dataset_with_space_in_the_head_of_the_name(self):
288
  """
289
  Test deleting a dataset when its name has space in the head.
290
  """
291
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
 
292
  res = ragflow.delete_dataset(" kb")
293
- assert res["success"] is False
 
294
 
295
  def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
296
  """
297
  Test deleting a dataset when its name has space in the tail.
298
  """
299
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
 
300
  res = ragflow.delete_dataset("kb ")
301
- assert res["success"] is False
 
302
 
303
  def test_delete_dataset_with_only_space_in_the_name(self):
304
  """
305
  Test deleting a dataset when its name only has space.
306
  """
307
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
 
308
  res = ragflow.delete_dataset(" ")
309
- assert res["success"] is False
 
310
 
311
  def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
312
  """
@@ -314,8 +329,10 @@ class TestDataset(TestSdk):
314
  """
315
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
316
  name = " " * (NAME_LENGTH_LIMIT + 1)
 
317
  res = ragflow.delete_dataset(name)
318
- assert res["success"] is False
 
319
 
320
  def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
321
  """
@@ -324,10 +341,102 @@ class TestDataset(TestSdk):
324
  """
325
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
326
  name = " " + "k" * NAME_LENGTH_LIMIT + " "
 
327
  res = ragflow.delete_dataset(name)
328
- assert res["success"] is False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
330
- # ---------------------------------mix the different methods--------------------
331
  def test_create_and_delete_dataset_together(self):
332
  """
333
  Test creating 1 dataset, and then deleting 1 dataset.
@@ -336,11 +445,11 @@ class TestDataset(TestSdk):
336
  # create 1 dataset
337
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
338
  res = ragflow.create_dataset("ddd")
339
- assert res['code'] == 0 and res['message'] == 'success'
340
 
341
  # delete 1 dataset
342
  res = ragflow.delete_dataset("ddd")
343
- assert res["success"] is True
344
 
345
  # create 10 datasets
346
  datasets_to_create = ["dataset1"] * 10
@@ -355,5 +464,5 @@ class TestDataset(TestSdk):
355
  # delete 10 datasets
356
  for name in real_name_to_create:
357
  res = ragflow.delete_dataset(name)
358
- assert res["success"] is True
359
 
 
1
+ from api.settings import RetCode
2
  from test_sdkbase import TestSdk
3
  from ragflow import RAGFlow
4
  import pytest
 
16
  4. update the kb
17
  5. delete the kb
18
  """
19
+
20
+ def setup_method(self):
21
+ """
22
+ Delete all the datasets.
23
+ """
24
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
25
+ listed_data = ragflow.list_dataset()
26
+ listed_data = listed_data['data']
27
+
28
+ listed_names = {d['name'] for d in listed_data}
29
+ for name in listed_names:
30
+ ragflow.delete_dataset(name)
31
+
32
  # -----------------------create_dataset---------------------------------
33
  def test_create_dataset_with_success(self):
34
  """
 
37
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
38
  # create a kb
39
  res = ragflow.create_dataset("kb1")
40
+ assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
41
 
42
  def test_create_dataset_with_empty_name(self):
43
  """
 
45
  """
46
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
47
  res = ragflow.create_dataset("")
48
+ assert res['message'] == 'Empty dataset name' and res['code'] == RetCode.DATA_ERROR
49
 
50
  def test_create_dataset_with_name_exceeding_limit(self):
51
  """
 
55
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
56
  res = ragflow.create_dataset(name)
57
  assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
58
+ and res['code'] == RetCode.DATA_ERROR)
59
 
60
  def test_create_dataset_name_with_space_in_the_middle(self):
61
  """
 
64
  name = "k b"
65
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
66
  res = ragflow.create_dataset(name)
67
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
68
 
69
  def test_create_dataset_name_with_space_in_the_head(self):
70
  """
 
73
  name = " kb"
74
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
75
  res = ragflow.create_dataset(name)
76
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
77
 
78
  def test_create_dataset_name_with_space_in_the_tail(self):
79
  """
 
82
  name = "kb "
83
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
84
  res = ragflow.create_dataset(name)
85
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
86
 
87
  def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
88
  """
 
92
  name = " " + "k" * NAME_LENGTH_LIMIT + " "
93
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
94
  res = ragflow.create_dataset(name)
95
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
96
 
97
  def test_create_dataset_with_two_same_name(self):
98
  """
 
100
  """
101
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
102
  res = ragflow.create_dataset("kb")
103
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
104
  res = ragflow.create_dataset("kb")
105
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
106
 
107
  def test_create_dataset_with_only_space_in_the_name(self):
108
  """
 
110
  """
111
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
112
  res = ragflow.create_dataset(" ")
113
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
114
 
115
  def test_create_dataset_with_space_number_exceeding_limit(self):
116
  """
 
119
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
120
  name = " " * NAME_LENGTH_LIMIT
121
  res = ragflow.create_dataset(name)
122
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
123
 
124
  def test_create_dataset_with_name_having_return(self):
125
  """
 
128
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
129
  name = "kb\n"
130
  res = ragflow.create_dataset(name)
131
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
132
 
133
  def test_create_dataset_with_name_having_the_null_character(self):
134
  """
 
137
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
138
  name = "kb\0"
139
  res = ragflow.create_dataset(name)
140
+ assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
141
 
142
  # -----------------------list_dataset---------------------------------
143
  def test_list_dataset_success(self):
 
147
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
148
  # Call the list_datasets method
149
  response = ragflow.list_dataset()
150
+ assert response['code'] == RetCode.SUCCESS
 
 
 
151
 
152
  def test_list_dataset_with_checking_size_and_name(self):
153
  """
 
163
  dataset_name = response['data']['dataset_name']
164
  real_name_to_create.add(dataset_name)
165
 
166
+ response = ragflow.list_dataset(0, 3)
167
+ listed_data = response['data']
168
 
169
  listed_names = {d['name'] for d in listed_data}
170
  assert listed_names == real_name_to_create
171
+ assert response['code'] == RetCode.SUCCESS
172
  assert len(listed_data) == len(datasets_to_create)
173
 
174
  def test_list_dataset_with_getting_empty_result(self):
 
185
  dataset_name = response['data']['dataset_name']
186
  real_name_to_create.add(dataset_name)
187
 
188
+ response = ragflow.list_dataset(0, 0)
189
+ listed_data = response['data']
190
 
191
  listed_names = {d['name'] for d in listed_data}
192
+
193
  assert listed_names == real_name_to_create
194
+ assert response['code'] == RetCode.SUCCESS
195
  assert len(listed_data) == 0
196
 
197
  def test_list_dataset_with_creating_100_knowledge_bases(self):
 
208
  dataset_name = response['data']['dataset_name']
209
  real_name_to_create.add(dataset_name)
210
 
211
+ res = ragflow.list_dataset(0, 100)
212
+ listed_data = res['data']
213
 
214
  listed_names = {d['name'] for d in listed_data}
215
  assert listed_names == real_name_to_create
216
+ assert res['code'] == RetCode.SUCCESS
217
  assert len(listed_data) == 100
218
 
219
  def test_list_dataset_with_showing_one_dataset(self):
 
222
  """
223
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
224
  response = ragflow.list_dataset(0, 1)
 
225
  datasets = response['data']
226
+ assert len(datasets) == 1 and response['code'] == RetCode.SUCCESS
227
 
228
  def test_list_dataset_failure(self):
229
  """
 
231
  """
232
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
233
  response = ragflow.list_dataset(-1, -1)
234
+ assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
 
235
 
236
  def test_list_dataset_for_empty_datasets(self):
237
  """
 
239
  """
240
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
241
  response = ragflow.list_dataset()
 
242
  datasets = response['data']
243
+ assert len(datasets) == 0 and response['code'] == RetCode.SUCCESS
244
 
245
  # TODO: have to set the limitation of the number of datasets
246
 
 
254
  res = ragflow.create_dataset("kb0")
255
  real_dataset_name = res['data']['dataset_name']
256
  # delete this dataset
257
+ res = ragflow.delete_dataset(real_dataset_name)
258
+ assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
259
 
260
  def test_delete_dataset_with_not_existing_dataset(self):
261
  """
 
263
  """
264
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
265
  res = ragflow.delete_dataset("weird_dataset")
266
+ assert res['code'] == RetCode.OPERATING_ERROR and res['message'] == 'The dataset cannot be found for your current account.'
267
 
268
  def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
269
  """
 
282
 
283
  for name in real_name_to_create:
284
  res = ragflow.delete_dataset(name)
285
+ assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
286
 
287
  def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
288
  """
289
  Test deleting a dataset when its name has space in the middle.
290
  """
291
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
292
+ ragflow.create_dataset("k b")
293
  res = ragflow.delete_dataset("k b")
294
+ assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
 
295
 
296
  def test_delete_dataset_with_space_in_the_head_of_the_name(self):
297
  """
298
  Test deleting a dataset when its name has space in the head.
299
  """
300
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
301
+ ragflow.create_dataset(" kb")
302
  res = ragflow.delete_dataset(" kb")
303
+ assert (res['code'] == RetCode.OPERATING_ERROR
304
+ and res['message'] == 'The dataset cannot be found for your current account.')
305
 
306
  def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
307
  """
308
  Test deleting a dataset when its name has space in the tail.
309
  """
310
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
311
+ ragflow.create_dataset("kb ")
312
  res = ragflow.delete_dataset("kb ")
313
+ assert (res['code'] == RetCode.OPERATING_ERROR
314
+ and res['message'] == 'The dataset cannot be found for your current account.')
315
 
316
  def test_delete_dataset_with_only_space_in_the_name(self):
317
  """
318
  Test deleting a dataset when its name only has space.
319
  """
320
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
321
+ ragflow.create_dataset(" ")
322
  res = ragflow.delete_dataset(" ")
323
+ assert (res['code'] == RetCode.OPERATING_ERROR
324
+ and res['message'] == 'The dataset cannot be found for your current account.')
325
 
326
  def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
327
  """
 
329
  """
330
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
331
  name = " " * (NAME_LENGTH_LIMIT + 1)
332
+ ragflow.create_dataset(name)
333
  res = ragflow.delete_dataset(name)
334
+ assert (res['code'] == RetCode.OPERATING_ERROR
335
+ and res['message'] == 'The dataset cannot be found for your current account.')
336
 
337
  def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
338
  """
 
341
  """
342
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
343
  name = " " + "k" * NAME_LENGTH_LIMIT + " "
344
+ ragflow.create_dataset(name)
345
  res = ragflow.delete_dataset(name)
346
+ assert (res['code'] == RetCode.OPERATING_ERROR
347
+ and res['message'] == 'The dataset cannot be found for your current account.')
348
+
349
+ # ---------------------------------get_dataset-----------------------------------------
350
+
351
+ def test_get_dataset_with_success(self):
352
+ """
353
+ Test getting a dataset which exists.
354
+ """
355
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
356
+ response = ragflow.create_dataset("test")
357
+ dataset_name = response['data']['dataset_name']
358
+ res = ragflow.get_dataset(dataset_name)
359
+ assert res['code'] == RetCode.SUCCESS and res['data']['name'] == dataset_name
360
+
361
+ def test_get_dataset_with_failure(self):
362
+ """
363
+ Test getting a dataset which does not exist.
364
+ """
365
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
366
+ res = ragflow.get_dataset("weird_dataset")
367
+ assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset!"
368
+
369
+ # ---------------------------------update a dataset-----------------------------------
370
+
371
+ def test_update_dataset_without_existing_dataset(self):
372
+ """
373
+ Test updating a dataset which does not exist.
374
+ """
375
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
376
+ params = {
377
+ 'name': 'new_name3',
378
+ 'description': 'new_description',
379
+ "permission": 'me',
380
+ "parser_id": 'naive',
381
+ "language": 'English'
382
+ }
383
+ res = ragflow.update_dataset("weird_dataset", **params)
384
+ assert (res['code'] == RetCode.OPERATING_ERROR
385
+ and res['message'] == 'Only the owner of knowledgebase is authorized for this operation!')
386
+
387
+ def test_update_dataset_with_updating_six_parameters(self):
388
+ """
389
+ Test updating a dataset when updating six parameters.
390
+ """
391
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
392
+ ragflow.create_dataset("new_name1")
393
+ params = {
394
+ 'name': 'new_name',
395
+ 'description': 'new_description1',
396
+ "permission": 'me',
397
+ "parser_id": 'naive',
398
+ "language": 'English'
399
+ }
400
+ res = ragflow.update_dataset("new_name1", **params)
401
+ assert res['code'] == RetCode.SUCCESS
402
+ assert (res['data']['description'] == 'new_description1'
403
+ and res['data']['name'] == 'new_name' and res['data']['permission'] == 'me'
404
+ and res['data']['language'] == 'English' and res['data']['parser_id'] == 'naive')
405
+
406
+ def test_update_dataset_with_updating_two_parameters(self):
407
+ """
408
+ Test updating a dataset when updating two parameters.
409
+ """
410
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
411
+ ragflow.create_dataset("new_name2")
412
+ params = {
413
+ "name": "new_name3",
414
+ "language": 'English'
415
+ }
416
+ res = ragflow.update_dataset("new_name2", **params)
417
+ assert (res['code'] == RetCode.SUCCESS and res['data']['name'] == "new_name3"
418
+ and res['data']['language'] == 'English')
419
+
420
+ def test_update_dataset_with_updating_layout_recognize(self):
421
+ """Test updating a dataset with only updating the layout_recognize"""
422
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
423
+ ragflow.create_dataset("test_update_dataset_with_updating_layout_recognize")
424
+ params = {
425
+ "layout_recognize": False
426
+ }
427
+ res = ragflow.update_dataset("test_update_dataset_with_updating_layout_recognize", **params)
428
+ assert res['code'] == RetCode.SUCCESS and res['data']['parser_config']['layout_recognize'] is False
429
+
430
+ def test_update_dataset_with_empty_parameter(self):
431
+ ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
432
+ ragflow.create_dataset("test_update_dataset_with_empty_parameter")
433
+ params = {}
434
+ res = ragflow.update_dataset("test_update_dataset_with_empty_parameter", **params)
435
+ assert (res['code'] == RetCode.DATA_ERROR
436
+ and res['message'] == 'Please input at least one parameter that you want to update!')
437
+
438
+ # ---------------------------------mix the different methods--------------------------
439
 
 
440
  def test_create_and_delete_dataset_together(self):
441
  """
442
  Test creating 1 dataset, and then deleting 1 dataset.
 
445
  # create 1 dataset
446
  ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
447
  res = ragflow.create_dataset("ddd")
448
+ assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
449
 
450
  # delete 1 dataset
451
  res = ragflow.delete_dataset("ddd")
452
+ assert res["code"] == RetCode.SUCCESS
453
 
454
  # create 10 datasets
455
  datasets_to_create = ["dataset1"] * 10
 
464
  # delete 10 datasets
465
  for name in real_name_to_create:
466
  res = ragflow.delete_dataset(name)
467
+ assert res["code"] == RetCode.SUCCESS
468