liuhua
liuhua
commited on
Commit
·
b7fdabe
1
Parent(s):
124ecb2
Delete useless files (#2889)
Browse files### What problem does this PR solve?
Delete useless files
### Type of change
- [x] Other (please describe):
Delete useless files
Co-authored-by: liuhua <[email protected]>
- api/apps/dataset_api.py +0 -880
- sdk/python/test/test_basic.py +0 -48
- sdk/python/test/test_dataset.py +0 -468
- sdk/python/test/test_document.py +0 -1130
api/apps/dataset_api.py
DELETED
@@ -1,880 +0,0 @@
|
|
1 |
-
#
|
2 |
-
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
3 |
-
#
|
4 |
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
-
# you may not use this file except in compliance with the License.
|
6 |
-
# You may obtain a copy of the License at
|
7 |
-
#
|
8 |
-
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
-
#
|
10 |
-
# Unless required by applicable law or agreed to in writing, software
|
11 |
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
-
# See the License for the specific language governing permissions and
|
14 |
-
# limitations under the License.
|
15 |
-
import os
|
16 |
-
import pathlib
|
17 |
-
import re
|
18 |
-
import warnings
|
19 |
-
from functools import partial
|
20 |
-
from io import BytesIO
|
21 |
-
|
22 |
-
from elasticsearch_dsl import Q
|
23 |
-
from flask import request, send_file
|
24 |
-
from flask_login import login_required, current_user
|
25 |
-
from httpx import HTTPError
|
26 |
-
|
27 |
-
from api.contants import NAME_LENGTH_LIMIT
|
28 |
-
from api.db import FileType, ParserType, FileSource, TaskStatus
|
29 |
-
from api.db import StatusEnum
|
30 |
-
from api.db.db_models import File
|
31 |
-
from api.db.services import duplicate_name
|
32 |
-
from api.db.services.document_service import DocumentService
|
33 |
-
from api.db.services.file2document_service import File2DocumentService
|
34 |
-
from api.db.services.file_service import FileService
|
35 |
-
from api.db.services.knowledgebase_service import KnowledgebaseService
|
36 |
-
from api.db.services.user_service import TenantService
|
37 |
-
from api.settings import RetCode
|
38 |
-
from api.utils import get_uuid
|
39 |
-
from api.utils.api_utils import construct_json_result, construct_error_response
|
40 |
-
from api.utils.api_utils import construct_result, validate_request
|
41 |
-
from api.utils.file_utils import filename_type, thumbnail
|
42 |
-
from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
|
43 |
-
from rag.nlp import search
|
44 |
-
from rag.utils.es_conn import ELASTICSEARCH
|
45 |
-
from rag.utils.storage_factory import STORAGE_IMPL
|
46 |
-
|
47 |
-
MAXIMUM_OF_UPLOADING_FILES = 256
|
48 |
-
|
49 |
-
|
50 |
-
# ------------------------------ create a dataset ---------------------------------------
|
51 |
-
|
52 |
-
@manager.route("/", methods=["POST"])
|
53 |
-
@login_required # use login
|
54 |
-
@validate_request("name") # check name key
|
55 |
-
def create_dataset():
|
56 |
-
# Check if Authorization header is present
|
57 |
-
authorization_token = request.headers.get("Authorization")
|
58 |
-
if not authorization_token:
|
59 |
-
return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.")
|
60 |
-
|
61 |
-
# TODO: Login or API key
|
62 |
-
# objs = APIToken.query(token=authorization_token)
|
63 |
-
#
|
64 |
-
# # Authorization error
|
65 |
-
# if not objs:
|
66 |
-
# return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Token is invalid.")
|
67 |
-
#
|
68 |
-
# tenant_id = objs[0].tenant_id
|
69 |
-
|
70 |
-
tenant_id = current_user.id
|
71 |
-
request_body = request.json
|
72 |
-
|
73 |
-
# In case that there's no name
|
74 |
-
if "name" not in request_body:
|
75 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="Expected 'name' field in request body")
|
76 |
-
|
77 |
-
dataset_name = request_body["name"]
|
78 |
-
|
79 |
-
# empty dataset_name
|
80 |
-
if not dataset_name:
|
81 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="Empty dataset name")
|
82 |
-
|
83 |
-
# In case that there's space in the head or the tail
|
84 |
-
dataset_name = dataset_name.strip()
|
85 |
-
|
86 |
-
# In case that the length of the name exceeds the limit
|
87 |
-
dataset_name_length = len(dataset_name)
|
88 |
-
if dataset_name_length > NAME_LENGTH_LIMIT:
|
89 |
-
return construct_json_result(
|
90 |
-
code=RetCode.DATA_ERROR,
|
91 |
-
message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!")
|
92 |
-
|
93 |
-
# In case that there are other fields in the data-binary
|
94 |
-
if len(request_body.keys()) > 1:
|
95 |
-
name_list = []
|
96 |
-
for key_name in request_body.keys():
|
97 |
-
if key_name != "name":
|
98 |
-
name_list.append(key_name)
|
99 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
100 |
-
message=f"fields: {name_list}, are not allowed in request body.")
|
101 |
-
|
102 |
-
# If there is a duplicate name, it will modify it to make it unique
|
103 |
-
request_body["name"] = duplicate_name(
|
104 |
-
KnowledgebaseService.query,
|
105 |
-
name=dataset_name,
|
106 |
-
tenant_id=tenant_id,
|
107 |
-
status=StatusEnum.VALID.value)
|
108 |
-
try:
|
109 |
-
request_body["id"] = get_uuid()
|
110 |
-
request_body["tenant_id"] = tenant_id
|
111 |
-
request_body["created_by"] = tenant_id
|
112 |
-
exist, t = TenantService.get_by_id(tenant_id)
|
113 |
-
if not exist:
|
114 |
-
return construct_result(code=RetCode.AUTHENTICATION_ERROR, message="Tenant not found.")
|
115 |
-
request_body["embd_id"] = t.embd_id
|
116 |
-
if not KnowledgebaseService.save(**request_body):
|
117 |
-
# failed to create new dataset
|
118 |
-
return construct_result()
|
119 |
-
return construct_json_result(code=RetCode.SUCCESS,
|
120 |
-
data={"dataset_name": request_body["name"], "dataset_id": request_body["id"]})
|
121 |
-
except Exception as e:
|
122 |
-
return construct_error_response(e)
|
123 |
-
|
124 |
-
|
125 |
-
# -----------------------------list datasets-------------------------------------------------------
|
126 |
-
|
127 |
-
@manager.route("/", methods=["GET"])
|
128 |
-
@login_required
|
129 |
-
def list_datasets():
|
130 |
-
offset = request.args.get("offset", 0)
|
131 |
-
count = request.args.get("count", -1)
|
132 |
-
orderby = request.args.get("orderby", "create_time")
|
133 |
-
desc = request.args.get("desc", True)
|
134 |
-
try:
|
135 |
-
tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
|
136 |
-
datasets = KnowledgebaseService.get_by_tenant_ids_by_offset(
|
137 |
-
[m["tenant_id"] for m in tenants], current_user.id, int(offset), int(count), orderby, desc)
|
138 |
-
return construct_json_result(data=datasets, code=RetCode.SUCCESS, message=f"List datasets successfully!")
|
139 |
-
except Exception as e:
|
140 |
-
return construct_error_response(e)
|
141 |
-
except HTTPError as http_err:
|
142 |
-
return construct_json_result(http_err)
|
143 |
-
|
144 |
-
|
145 |
-
# ---------------------------------delete a dataset ----------------------------
|
146 |
-
|
147 |
-
@manager.route("/<dataset_id>", methods=["DELETE"])
|
148 |
-
@login_required
|
149 |
-
def remove_dataset(dataset_id):
|
150 |
-
try:
|
151 |
-
datasets = KnowledgebaseService.query(created_by=current_user.id, id=dataset_id)
|
152 |
-
|
153 |
-
# according to the id, searching for the dataset
|
154 |
-
if not datasets:
|
155 |
-
return construct_json_result(message=f"The dataset cannot be found for your current account.",
|
156 |
-
code=RetCode.OPERATING_ERROR)
|
157 |
-
|
158 |
-
# Iterating the documents inside the dataset
|
159 |
-
for doc in DocumentService.query(kb_id=dataset_id):
|
160 |
-
if not DocumentService.remove_document(doc, datasets[0].tenant_id):
|
161 |
-
# the process of deleting failed
|
162 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
163 |
-
message="There was an error during the document removal process. "
|
164 |
-
"Please check the status of the RAGFlow server and try the removal again.")
|
165 |
-
# delete the other files
|
166 |
-
f2d = File2DocumentService.get_by_document_id(doc.id)
|
167 |
-
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
|
168 |
-
File2DocumentService.delete_by_document_id(doc.id)
|
169 |
-
|
170 |
-
# delete the dataset
|
171 |
-
if not KnowledgebaseService.delete_by_id(dataset_id):
|
172 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
173 |
-
message="There was an error during the dataset removal process. "
|
174 |
-
"Please check the status of the RAGFlow server and try the removal again.")
|
175 |
-
# success
|
176 |
-
return construct_json_result(code=RetCode.SUCCESS, message=f"Remove dataset: {dataset_id} successfully")
|
177 |
-
except Exception as e:
|
178 |
-
return construct_error_response(e)
|
179 |
-
|
180 |
-
|
181 |
-
# ------------------------------ get details of a dataset ----------------------------------------
|
182 |
-
|
183 |
-
@manager.route("/<dataset_id>", methods=["GET"])
|
184 |
-
@login_required
|
185 |
-
def get_dataset(dataset_id):
|
186 |
-
try:
|
187 |
-
dataset = KnowledgebaseService.get_detail(dataset_id)
|
188 |
-
if not dataset:
|
189 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="Can't find this dataset!")
|
190 |
-
return construct_json_result(data=dataset, code=RetCode.SUCCESS)
|
191 |
-
except Exception as e:
|
192 |
-
return construct_json_result(e)
|
193 |
-
|
194 |
-
|
195 |
-
# ------------------------------ update a dataset --------------------------------------------
|
196 |
-
|
197 |
-
@manager.route("/<dataset_id>", methods=["PUT"])
|
198 |
-
@login_required
|
199 |
-
def update_dataset(dataset_id):
|
200 |
-
req = request.json
|
201 |
-
try:
|
202 |
-
# the request cannot be empty
|
203 |
-
if not req:
|
204 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="Please input at least one parameter that "
|
205 |
-
"you want to update!")
|
206 |
-
# check whether the dataset can be found
|
207 |
-
if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
|
208 |
-
return construct_json_result(message=f"Only the owner of knowledgebase is authorized for this operation!",
|
209 |
-
code=RetCode.OPERATING_ERROR)
|
210 |
-
|
211 |
-
exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
|
212 |
-
# check whether there is this dataset
|
213 |
-
if not exist:
|
214 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
|
215 |
-
|
216 |
-
if "name" in req:
|
217 |
-
name = req["name"].strip()
|
218 |
-
# check whether there is duplicate name
|
219 |
-
if name.lower() != dataset.name.lower() \
|
220 |
-
and len(KnowledgebaseService.query(name=name, tenant_id=current_user.id,
|
221 |
-
status=StatusEnum.VALID.value)) > 1:
|
222 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
223 |
-
message=f"The name: {name.lower()} is already used by other "
|
224 |
-
f"datasets. Please choose a different name.")
|
225 |
-
|
226 |
-
dataset_updating_data = {}
|
227 |
-
chunk_num = req.get("chunk_num")
|
228 |
-
# modify the value of 11 parameters
|
229 |
-
|
230 |
-
# 2 parameters: embedding id and chunk method
|
231 |
-
# only if chunk_num is 0, the user can update the embedding id
|
232 |
-
if req.get("embedding_model_id"):
|
233 |
-
if chunk_num == 0:
|
234 |
-
dataset_updating_data["embd_id"] = req["embedding_model_id"]
|
235 |
-
else:
|
236 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
237 |
-
message="You have already parsed the document in this "
|
238 |
-
"dataset, so you cannot change the embedding "
|
239 |
-
"model.")
|
240 |
-
# only if chunk_num is 0, the user can update the chunk_method
|
241 |
-
if "chunk_method" in req:
|
242 |
-
type_value = req["chunk_method"]
|
243 |
-
if is_illegal_value_for_enum(type_value, ParserType):
|
244 |
-
return construct_json_result(message=f"Illegal value {type_value} for 'chunk_method' field.",
|
245 |
-
code=RetCode.DATA_ERROR)
|
246 |
-
if chunk_num != 0:
|
247 |
-
construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document "
|
248 |
-
"in this dataset, so you cannot "
|
249 |
-
"change the chunk method.")
|
250 |
-
dataset_updating_data["parser_id"] = req["template_type"]
|
251 |
-
|
252 |
-
# convert the photo parameter to avatar
|
253 |
-
if req.get("photo"):
|
254 |
-
dataset_updating_data["avatar"] = req["photo"]
|
255 |
-
|
256 |
-
# layout_recognize
|
257 |
-
if "layout_recognize" in req:
|
258 |
-
if "parser_config" not in dataset_updating_data:
|
259 |
-
dataset_updating_data['parser_config'] = {}
|
260 |
-
dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
|
261 |
-
|
262 |
-
# TODO: updating use_raptor needs to construct a class
|
263 |
-
|
264 |
-
# 6 parameters
|
265 |
-
for key in ["name", "language", "description", "permission", "id", "token_num"]:
|
266 |
-
if key in req:
|
267 |
-
dataset_updating_data[key] = req.get(key)
|
268 |
-
|
269 |
-
# update
|
270 |
-
if not KnowledgebaseService.update_by_id(dataset.id, dataset_updating_data):
|
271 |
-
return construct_json_result(code=RetCode.OPERATING_ERROR, message="Failed to update! "
|
272 |
-
"Please check the status of RAGFlow "
|
273 |
-
"server and try again!")
|
274 |
-
|
275 |
-
exist, dataset = KnowledgebaseService.get_by_id(dataset.id)
|
276 |
-
if not exist:
|
277 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="Failed to get the dataset "
|
278 |
-
"using the dataset ID.")
|
279 |
-
|
280 |
-
return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
|
281 |
-
except Exception as e:
|
282 |
-
return construct_error_response(e)
|
283 |
-
|
284 |
-
|
285 |
-
# --------------------------------content management ----------------------------------------------
|
286 |
-
|
287 |
-
# ----------------------------upload files-----------------------------------------------------
|
288 |
-
@manager.route("/<dataset_id>/documents/", methods=["POST"])
|
289 |
-
@login_required
|
290 |
-
def upload_documents(dataset_id):
|
291 |
-
# no files
|
292 |
-
if not request.files:
|
293 |
-
return construct_json_result(
|
294 |
-
message="There is no file!", code=RetCode.ARGUMENT_ERROR)
|
295 |
-
|
296 |
-
# the number of uploading files exceeds the limit
|
297 |
-
file_objs = request.files.getlist("file")
|
298 |
-
num_file_objs = len(file_objs)
|
299 |
-
|
300 |
-
if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
|
301 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
|
302 |
-
f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
|
303 |
-
|
304 |
-
# no dataset
|
305 |
-
exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
|
306 |
-
if not exist:
|
307 |
-
return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
|
308 |
-
|
309 |
-
for file_obj in file_objs:
|
310 |
-
file_name = file_obj.filename
|
311 |
-
# no name
|
312 |
-
if not file_name:
|
313 |
-
return construct_json_result(
|
314 |
-
message="There is a file without name!", code=RetCode.ARGUMENT_ERROR)
|
315 |
-
|
316 |
-
# TODO: support the remote files
|
317 |
-
if 'http' in file_name:
|
318 |
-
return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
|
319 |
-
|
320 |
-
# get the root_folder
|
321 |
-
root_folder = FileService.get_root_folder(current_user.id)
|
322 |
-
# get the id of the root_folder
|
323 |
-
parent_file_id = root_folder["id"] # document id
|
324 |
-
# this is for the new user, create '.knowledgebase' file
|
325 |
-
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
326 |
-
# go inside this folder, get the kb_root_folder
|
327 |
-
kb_root_folder = FileService.get_kb_folder(current_user.id)
|
328 |
-
# link the file management to the kb_folder
|
329 |
-
kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
|
330 |
-
|
331 |
-
# grab all the errs
|
332 |
-
err = []
|
333 |
-
MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
|
334 |
-
uploaded_docs_json = []
|
335 |
-
for file in file_objs:
|
336 |
-
try:
|
337 |
-
# TODO: get this value from the database as some tenants have this limit while others don't
|
338 |
-
if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
|
339 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
340 |
-
message="Exceed the maximum file number of a free user!")
|
341 |
-
# deal with the duplicate name
|
342 |
-
filename = duplicate_name(
|
343 |
-
DocumentService.query,
|
344 |
-
name=file.filename,
|
345 |
-
kb_id=dataset.id)
|
346 |
-
|
347 |
-
# deal with the unsupported type
|
348 |
-
filetype = filename_type(filename)
|
349 |
-
if filetype == FileType.OTHER.value:
|
350 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
351 |
-
message="This type of file has not been supported yet!")
|
352 |
-
|
353 |
-
# upload to the minio
|
354 |
-
location = filename
|
355 |
-
while STORAGE_IMPL.obj_exist(dataset_id, location):
|
356 |
-
location += "_"
|
357 |
-
|
358 |
-
blob = file.read()
|
359 |
-
|
360 |
-
# the content is empty, raising a warning
|
361 |
-
if blob == b'':
|
362 |
-
warnings.warn(f"[WARNING]: The content of the file {filename} is empty.")
|
363 |
-
|
364 |
-
STORAGE_IMPL.put(dataset_id, location, blob)
|
365 |
-
|
366 |
-
doc = {
|
367 |
-
"id": get_uuid(),
|
368 |
-
"kb_id": dataset.id,
|
369 |
-
"parser_id": dataset.parser_id,
|
370 |
-
"parser_config": dataset.parser_config,
|
371 |
-
"created_by": current_user.id,
|
372 |
-
"type": filetype,
|
373 |
-
"name": filename,
|
374 |
-
"location": location,
|
375 |
-
"size": len(blob),
|
376 |
-
"thumbnail": thumbnail(filename, blob)
|
377 |
-
}
|
378 |
-
if doc["type"] == FileType.VISUAL:
|
379 |
-
doc["parser_id"] = ParserType.PICTURE.value
|
380 |
-
if doc["type"] == FileType.AURAL:
|
381 |
-
doc["parser_id"] = ParserType.AUDIO.value
|
382 |
-
if re.search(r"\.(ppt|pptx|pages)$", filename):
|
383 |
-
doc["parser_id"] = ParserType.PRESENTATION.value
|
384 |
-
if re.search(r"\.(eml)$", filename):
|
385 |
-
doc["parser_id"] = ParserType.EMAIL.value
|
386 |
-
DocumentService.insert(doc)
|
387 |
-
|
388 |
-
FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
|
389 |
-
uploaded_docs_json.append(doc)
|
390 |
-
except Exception as e:
|
391 |
-
err.append(file.filename + ": " + str(e))
|
392 |
-
|
393 |
-
if err:
|
394 |
-
# return all the errors
|
395 |
-
return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
|
396 |
-
# success
|
397 |
-
return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
|
398 |
-
|
399 |
-
|
400 |
-
# ----------------------------delete a file-----------------------------------------------------
|
401 |
-
@manager.route("/<dataset_id>/documents/<document_id>", methods=["DELETE"])
|
402 |
-
@login_required
|
403 |
-
def delete_document(document_id, dataset_id): # string
|
404 |
-
# get the root folder
|
405 |
-
root_folder = FileService.get_root_folder(current_user.id)
|
406 |
-
# parent file's id
|
407 |
-
parent_file_id = root_folder["id"]
|
408 |
-
# consider the new user
|
409 |
-
FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
|
410 |
-
# store all the errors that may have
|
411 |
-
errors = ""
|
412 |
-
try:
|
413 |
-
# whether there is this document
|
414 |
-
exist, doc = DocumentService.get_by_id(document_id)
|
415 |
-
if not exist:
|
416 |
-
return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
|
417 |
-
# whether this doc is authorized by this tenant
|
418 |
-
tenant_id = DocumentService.get_tenant_id(document_id)
|
419 |
-
if not tenant_id:
|
420 |
-
return construct_json_result(
|
421 |
-
message=f"You cannot delete this document {document_id} due to the authorization"
|
422 |
-
f" reason!", code=RetCode.AUTHENTICATION_ERROR)
|
423 |
-
|
424 |
-
# get the doc's id and location
|
425 |
-
real_dataset_id, location = File2DocumentService.get_storage_address(doc_id=document_id)
|
426 |
-
|
427 |
-
if real_dataset_id != dataset_id:
|
428 |
-
return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
|
429 |
-
f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
|
430 |
-
|
431 |
-
# there is an issue when removing
|
432 |
-
if not DocumentService.remove_document(doc, tenant_id):
|
433 |
-
return construct_json_result(
|
434 |
-
message="There was an error during the document removal process. Please check the status of the "
|
435 |
-
"RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
|
436 |
-
|
437 |
-
# fetch the File2Document record associated with the provided document ID.
|
438 |
-
file_to_doc = File2DocumentService.get_by_document_id(document_id)
|
439 |
-
# delete the associated File record.
|
440 |
-
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
|
441 |
-
# delete the File2Document record itself using the document ID. This removes the
|
442 |
-
# association between the document and the file after the File record has been deleted.
|
443 |
-
File2DocumentService.delete_by_document_id(document_id)
|
444 |
-
|
445 |
-
# delete it from minio
|
446 |
-
STORAGE_IMPL.rm(dataset_id, location)
|
447 |
-
except Exception as e:
|
448 |
-
errors += str(e)
|
449 |
-
if errors:
|
450 |
-
return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
|
451 |
-
|
452 |
-
return construct_json_result(data=True, code=RetCode.SUCCESS)
|
453 |
-
|
454 |
-
|
455 |
-
# ----------------------------list files-----------------------------------------------------
|
456 |
-
@manager.route('/<dataset_id>/documents/', methods=['GET'])
|
457 |
-
@login_required
|
458 |
-
def list_documents(dataset_id):
|
459 |
-
if not dataset_id:
|
460 |
-
return construct_json_result(
|
461 |
-
data=False, message="Lack of 'dataset_id'", code=RetCode.ARGUMENT_ERROR)
|
462 |
-
|
463 |
-
# searching keywords
|
464 |
-
keywords = request.args.get("keywords", "")
|
465 |
-
|
466 |
-
offset = request.args.get("offset", 0)
|
467 |
-
count = request.args.get("count", -1)
|
468 |
-
order_by = request.args.get("order_by", "create_time")
|
469 |
-
descend = request.args.get("descend", True)
|
470 |
-
try:
|
471 |
-
docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by,
|
472 |
-
descend, keywords)
|
473 |
-
|
474 |
-
return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS)
|
475 |
-
except Exception as e:
|
476 |
-
return construct_error_response(e)
|
477 |
-
|
478 |
-
|
479 |
-
# ----------------------------update: enable rename-----------------------------------------------------
|
480 |
-
@manager.route("/<dataset_id>/documents/<document_id>", methods=["PUT"])
|
481 |
-
@login_required
|
482 |
-
def update_document(dataset_id, document_id):
|
483 |
-
req = request.json
|
484 |
-
try:
|
485 |
-
legal_parameters = set()
|
486 |
-
legal_parameters.add("name")
|
487 |
-
legal_parameters.add("enable")
|
488 |
-
legal_parameters.add("template_type")
|
489 |
-
|
490 |
-
for key in req.keys():
|
491 |
-
if key not in legal_parameters:
|
492 |
-
return construct_json_result(code=RetCode.ARGUMENT_ERROR, message=f"{key} is an illegal parameter.")
|
493 |
-
|
494 |
-
# The request body cannot be empty
|
495 |
-
if not req:
|
496 |
-
return construct_json_result(
|
497 |
-
code=RetCode.DATA_ERROR,
|
498 |
-
message="Please input at least one parameter that you want to update!")
|
499 |
-
|
500 |
-
# Check whether there is this dataset
|
501 |
-
exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
|
502 |
-
if not exist:
|
503 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message=f"This dataset {dataset_id} cannot be found!")
|
504 |
-
|
505 |
-
# The document does not exist
|
506 |
-
exist, document = DocumentService.get_by_id(document_id)
|
507 |
-
if not exist:
|
508 |
-
return construct_json_result(message=f"This document {document_id} cannot be found!",
|
509 |
-
code=RetCode.ARGUMENT_ERROR)
|
510 |
-
|
511 |
-
# Deal with the different keys
|
512 |
-
updating_data = {}
|
513 |
-
if "name" in req:
|
514 |
-
new_name = req["name"]
|
515 |
-
updating_data["name"] = new_name
|
516 |
-
# Check whether the new_name is suitable
|
517 |
-
# 1. no name value
|
518 |
-
if not new_name:
|
519 |
-
return construct_json_result(code=RetCode.DATA_ERROR, message="There is no new name.")
|
520 |
-
|
521 |
-
# 2. In case that there's space in the head or the tail
|
522 |
-
new_name = new_name.strip()
|
523 |
-
|
524 |
-
# 3. Check whether the new_name has the same extension of file as before
|
525 |
-
if pathlib.Path(new_name.lower()).suffix != pathlib.Path(
|
526 |
-
document.name.lower()).suffix:
|
527 |
-
return construct_json_result(
|
528 |
-
data=False,
|
529 |
-
message="The extension of file cannot be changed",
|
530 |
-
code=RetCode.ARGUMENT_ERROR)
|
531 |
-
|
532 |
-
# 4. Check whether the new name has already been occupied by other file
|
533 |
-
for d in DocumentService.query(name=new_name, kb_id=document.kb_id):
|
534 |
-
if d.name == new_name:
|
535 |
-
return construct_json_result(
|
536 |
-
message="Duplicated document name in the same dataset.",
|
537 |
-
code=RetCode.ARGUMENT_ERROR)
|
538 |
-
|
539 |
-
if "enable" in req:
|
540 |
-
enable_value = req["enable"]
|
541 |
-
if is_illegal_value_for_enum(enable_value, StatusEnum):
|
542 |
-
return construct_json_result(message=f"Illegal value {enable_value} for 'enable' field.",
|
543 |
-
code=RetCode.DATA_ERROR)
|
544 |
-
updating_data["status"] = enable_value
|
545 |
-
|
546 |
-
# TODO: Chunk-method - update parameters inside the json object parser_config
|
547 |
-
if "template_type" in req:
|
548 |
-
type_value = req["template_type"]
|
549 |
-
if is_illegal_value_for_enum(type_value, ParserType):
|
550 |
-
return construct_json_result(message=f"Illegal value {type_value} for 'template_type' field.",
|
551 |
-
code=RetCode.DATA_ERROR)
|
552 |
-
updating_data["parser_id"] = req["template_type"]
|
553 |
-
|
554 |
-
# The process of updating
|
555 |
-
if not DocumentService.update_by_id(document_id, updating_data):
|
556 |
-
return construct_json_result(
|
557 |
-
code=RetCode.OPERATING_ERROR,
|
558 |
-
message="Failed to update document in the database! "
|
559 |
-
"Please check the status of RAGFlow server and try again!")
|
560 |
-
|
561 |
-
# name part: file service
|
562 |
-
if "name" in req:
|
563 |
-
# Get file by document id
|
564 |
-
file_information = File2DocumentService.get_by_document_id(document_id)
|
565 |
-
if file_information:
|
566 |
-
exist, file = FileService.get_by_id(file_information[0].file_id)
|
567 |
-
FileService.update_by_id(file.id, {"name": req["name"]})
|
568 |
-
|
569 |
-
exist, document = DocumentService.get_by_id(document_id)
|
570 |
-
|
571 |
-
# Success
|
572 |
-
return construct_json_result(data=document.to_json(), message="Success", code=RetCode.SUCCESS)
|
573 |
-
except Exception as e:
|
574 |
-
return construct_error_response(e)
|
575 |
-
|
576 |
-
|
577 |
-
# Helper method to judge whether it's an illegal value
|
578 |
-
def is_illegal_value_for_enum(value, enum_class):
|
579 |
-
return value not in enum_class.__members__.values()
|
580 |
-
|
581 |
-
|
582 |
-
# ----------------------------download a file-----------------------------------------------------
|
583 |
-
@manager.route("/<dataset_id>/documents/<document_id>", methods=["GET"])
|
584 |
-
@login_required
|
585 |
-
def download_document(dataset_id, document_id):
|
586 |
-
try:
|
587 |
-
# Check whether there is this dataset
|
588 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
589 |
-
if not exist:
|
590 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
591 |
-
message=f"This dataset '{dataset_id}' cannot be found!")
|
592 |
-
|
593 |
-
# Check whether there is this document
|
594 |
-
exist, document = DocumentService.get_by_id(document_id)
|
595 |
-
if not exist:
|
596 |
-
return construct_json_result(message=f"This document '{document_id}' cannot be found!",
|
597 |
-
code=RetCode.ARGUMENT_ERROR)
|
598 |
-
|
599 |
-
# The process of downloading
|
600 |
-
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
601 |
-
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
|
602 |
-
if not file_stream:
|
603 |
-
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
604 |
-
|
605 |
-
file = BytesIO(file_stream)
|
606 |
-
|
607 |
-
# Use send_file with a proper filename and MIME type
|
608 |
-
return send_file(
|
609 |
-
file,
|
610 |
-
as_attachment=True,
|
611 |
-
download_name=document.name,
|
612 |
-
mimetype='application/octet-stream' # Set a default MIME type
|
613 |
-
)
|
614 |
-
|
615 |
-
# Error
|
616 |
-
except Exception as e:
|
617 |
-
return construct_error_response(e)
|
618 |
-
|
619 |
-
|
620 |
-
# ----------------------------start parsing a document-----------------------------------------------------
|
621 |
-
# helper method for parsing
|
622 |
-
# callback method
|
623 |
-
def doc_parse_callback(doc_id, prog=None, msg=""):
|
624 |
-
cancel = DocumentService.do_cancel(doc_id)
|
625 |
-
if cancel:
|
626 |
-
raise Exception("The parsing process has been cancelled!")
|
627 |
-
|
628 |
-
"""
|
629 |
-
def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
|
630 |
-
match parser_name:
|
631 |
-
case "book":
|
632 |
-
book.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
633 |
-
case "laws":
|
634 |
-
laws.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
635 |
-
case "manual":
|
636 |
-
manual.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
637 |
-
case "naive":
|
638 |
-
# It's the mode by default, which is general in the front-end
|
639 |
-
naive.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
640 |
-
case "one":
|
641 |
-
one.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
642 |
-
case "paper":
|
643 |
-
paper.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
644 |
-
case "picture":
|
645 |
-
picture.chunk(doc_name, binary=binary, tenant_id=tenant_id, lang="Chinese",
|
646 |
-
callback=partial(doc_parse_callback, doc_id))
|
647 |
-
case "presentation":
|
648 |
-
presentation.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
649 |
-
case "qa":
|
650 |
-
qa.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
651 |
-
case "resume":
|
652 |
-
resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
653 |
-
case "table":
|
654 |
-
table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
655 |
-
case "audio":
|
656 |
-
audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
657 |
-
case "email":
|
658 |
-
email.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
|
659 |
-
case _:
|
660 |
-
return False
|
661 |
-
|
662 |
-
return True
|
663 |
-
"""
|
664 |
-
|
665 |
-
|
666 |
-
@manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])
|
667 |
-
@login_required
|
668 |
-
def parse_document(dataset_id, document_id):
|
669 |
-
try:
|
670 |
-
# valid dataset
|
671 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
672 |
-
if not exist:
|
673 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
674 |
-
message=f"This dataset '{dataset_id}' cannot be found!")
|
675 |
-
|
676 |
-
return parsing_document_internal(document_id)
|
677 |
-
|
678 |
-
except Exception as e:
|
679 |
-
return construct_error_response(e)
|
680 |
-
|
681 |
-
|
682 |
-
# ----------------------------start parsing documents-----------------------------------------------------
|
683 |
-
@manager.route("/<dataset_id>/documents/status", methods=["POST"])
|
684 |
-
@login_required
|
685 |
-
def parse_documents(dataset_id):
|
686 |
-
doc_ids = request.json["doc_ids"]
|
687 |
-
try:
|
688 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
689 |
-
if not exist:
|
690 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
691 |
-
message=f"This dataset '{dataset_id}' cannot be found!")
|
692 |
-
# two conditions
|
693 |
-
if not doc_ids:
|
694 |
-
# documents inside the dataset
|
695 |
-
docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
|
696 |
-
True, "")
|
697 |
-
doc_ids = [doc["id"] for doc in docs]
|
698 |
-
|
699 |
-
message = ""
|
700 |
-
# for loop
|
701 |
-
for id in doc_ids:
|
702 |
-
res = parsing_document_internal(id)
|
703 |
-
res_body = res.json
|
704 |
-
if res_body["code"] == RetCode.SUCCESS:
|
705 |
-
message += res_body["message"]
|
706 |
-
else:
|
707 |
-
return res
|
708 |
-
return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
|
709 |
-
|
710 |
-
except Exception as e:
|
711 |
-
return construct_error_response(e)
|
712 |
-
|
713 |
-
|
714 |
-
# helper method for parsing the document
|
715 |
-
def parsing_document_internal(id):
|
716 |
-
message = ""
|
717 |
-
try:
|
718 |
-
# Check whether there is this document
|
719 |
-
exist, document = DocumentService.get_by_id(id)
|
720 |
-
if not exist:
|
721 |
-
return construct_json_result(message=f"This document '{id}' cannot be found!",
|
722 |
-
code=RetCode.ARGUMENT_ERROR)
|
723 |
-
|
724 |
-
tenant_id = DocumentService.get_tenant_id(id)
|
725 |
-
if not tenant_id:
|
726 |
-
return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
|
727 |
-
|
728 |
-
info = {"run": "1", "progress": 0}
|
729 |
-
info["progress_msg"] = ""
|
730 |
-
info["chunk_num"] = 0
|
731 |
-
info["token_num"] = 0
|
732 |
-
|
733 |
-
DocumentService.update_by_id(id, info)
|
734 |
-
|
735 |
-
ELASTICSEARCH.deleteByQuery(Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
736 |
-
|
737 |
-
_, doc_attributes = DocumentService.get_by_id(id)
|
738 |
-
doc_attributes = doc_attributes.to_dict()
|
739 |
-
doc_id = doc_attributes["id"]
|
740 |
-
|
741 |
-
bucket, doc_name = File2DocumentService.get_storage_address(doc_id=doc_id)
|
742 |
-
binary = STORAGE_IMPL.get(bucket, doc_name)
|
743 |
-
parser_name = doc_attributes["parser_id"]
|
744 |
-
if binary:
|
745 |
-
res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)
|
746 |
-
if res is False:
|
747 |
-
message += f"The parser id: {parser_name} of the document {doc_id} is not supported; "
|
748 |
-
else:
|
749 |
-
message += f"Empty data in the document: {doc_name}; "
|
750 |
-
# failed in parsing
|
751 |
-
if doc_attributes["status"] == TaskStatus.FAIL.value:
|
752 |
-
message += f"Failed in parsing the document: {doc_id}; "
|
753 |
-
return construct_json_result(code=RetCode.SUCCESS, message=message)
|
754 |
-
except Exception as e:
|
755 |
-
return construct_error_response(e)
|
756 |
-
|
757 |
-
|
758 |
-
# ----------------------------stop parsing a doc-----------------------------------------------------
|
759 |
-
@manager.route("<dataset_id>/documents/<document_id>/status", methods=["DELETE"])
|
760 |
-
@login_required
|
761 |
-
def stop_parsing_document(dataset_id, document_id):
|
762 |
-
try:
|
763 |
-
# valid dataset
|
764 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
765 |
-
if not exist:
|
766 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
767 |
-
message=f"This dataset '{dataset_id}' cannot be found!")
|
768 |
-
|
769 |
-
return stop_parsing_document_internal(document_id)
|
770 |
-
|
771 |
-
except Exception as e:
|
772 |
-
return construct_error_response(e)
|
773 |
-
|
774 |
-
|
775 |
-
# ----------------------------stop parsing docs-----------------------------------------------------
|
776 |
-
@manager.route("<dataset_id>/documents/status", methods=["DELETE"])
|
777 |
-
@login_required
|
778 |
-
def stop_parsing_documents(dataset_id):
|
779 |
-
doc_ids = request.json["doc_ids"]
|
780 |
-
try:
|
781 |
-
# valid dataset?
|
782 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
783 |
-
if not exist:
|
784 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
785 |
-
message=f"This dataset '{dataset_id}' cannot be found!")
|
786 |
-
if not doc_ids:
|
787 |
-
# documents inside the dataset
|
788 |
-
docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
|
789 |
-
True, "")
|
790 |
-
doc_ids = [doc["id"] for doc in docs]
|
791 |
-
|
792 |
-
message = ""
|
793 |
-
# for loop
|
794 |
-
for id in doc_ids:
|
795 |
-
res = stop_parsing_document_internal(id)
|
796 |
-
res_body = res.json
|
797 |
-
if res_body["code"] == RetCode.SUCCESS:
|
798 |
-
message += res_body["message"]
|
799 |
-
else:
|
800 |
-
return res
|
801 |
-
return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
|
802 |
-
|
803 |
-
except Exception as e:
|
804 |
-
return construct_error_response(e)
|
805 |
-
|
806 |
-
|
807 |
-
# Helper method
|
808 |
-
def stop_parsing_document_internal(document_id):
|
809 |
-
try:
|
810 |
-
# valid doc?
|
811 |
-
exist, doc = DocumentService.get_by_id(document_id)
|
812 |
-
if not exist:
|
813 |
-
return construct_json_result(message=f"This document '{document_id}' cannot be found!",
|
814 |
-
code=RetCode.ARGUMENT_ERROR)
|
815 |
-
doc_attributes = doc.to_dict()
|
816 |
-
|
817 |
-
# only when the status is parsing, we need to stop it
|
818 |
-
if doc_attributes["status"] == TaskStatus.RUNNING.value:
|
819 |
-
tenant_id = DocumentService.get_tenant_id(document_id)
|
820 |
-
if not tenant_id:
|
821 |
-
return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
|
822 |
-
|
823 |
-
# update successfully?
|
824 |
-
if not DocumentService.update_by_id(document_id, {"status": "2"}): # cancel
|
825 |
-
return construct_json_result(
|
826 |
-
code=RetCode.OPERATING_ERROR,
|
827 |
-
message="There was an error during the stopping parsing the document process. "
|
828 |
-
"Please check the status of the RAGFlow server and try the update again."
|
829 |
-
)
|
830 |
-
|
831 |
-
_, doc_attributes = DocumentService.get_by_id(document_id)
|
832 |
-
doc_attributes = doc_attributes.to_dict()
|
833 |
-
|
834 |
-
# failed in stop parsing
|
835 |
-
if doc_attributes["status"] == TaskStatus.RUNNING.value:
|
836 |
-
return construct_json_result(message=f"Failed in parsing the document: {document_id}; ", code=RetCode.SUCCESS)
|
837 |
-
return construct_json_result(code=RetCode.SUCCESS, message="")
|
838 |
-
except Exception as e:
|
839 |
-
return construct_error_response(e)
|
840 |
-
|
841 |
-
|
842 |
-
# ----------------------------show the status of the file-----------------------------------------------------
|
843 |
-
@manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
|
844 |
-
@login_required
|
845 |
-
def show_parsing_status(dataset_id, document_id):
|
846 |
-
try:
|
847 |
-
# valid dataset
|
848 |
-
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
|
849 |
-
if not exist:
|
850 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
851 |
-
message=f"This dataset: '{dataset_id}' cannot be found!")
|
852 |
-
# valid document
|
853 |
-
exist, _ = DocumentService.get_by_id(document_id)
|
854 |
-
if not exist:
|
855 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
856 |
-
message=f"This document: '{document_id}' is not a valid document.")
|
857 |
-
|
858 |
-
_, doc = DocumentService.get_by_id(document_id) # get doc object
|
859 |
-
doc_attributes = doc.to_dict()
|
860 |
-
|
861 |
-
return construct_json_result(
|
862 |
-
data={"progress": doc_attributes["progress"], "status": TaskStatus(doc_attributes["status"]).name},
|
863 |
-
code=RetCode.SUCCESS
|
864 |
-
)
|
865 |
-
except Exception as e:
|
866 |
-
return construct_error_response(e)
|
867 |
-
|
868 |
-
# ----------------------------list the chunks of the file-----------------------------------------------------
|
869 |
-
|
870 |
-
# -- --------------------------delete the chunk-----------------------------------------------------
|
871 |
-
|
872 |
-
# ----------------------------edit the status of the chunk-----------------------------------------------------
|
873 |
-
|
874 |
-
# ----------------------------insert a new chunk-----------------------------------------------------
|
875 |
-
|
876 |
-
# ----------------------------upload a file-----------------------------------------------------
|
877 |
-
|
878 |
-
# ----------------------------get a specific chunk-----------------------------------------------------
|
879 |
-
|
880 |
-
# ----------------------------retrieval test-----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sdk/python/test/test_basic.py
DELETED
@@ -1,48 +0,0 @@
|
|
1 |
-
from test_sdkbase import TestSdk
|
2 |
-
import ragflow
|
3 |
-
from ragflow.ragflow import RAGFLow
|
4 |
-
import pytest
|
5 |
-
from unittest.mock import MagicMock
|
6 |
-
from common import API_KEY, HOST_ADDRESS
|
7 |
-
|
8 |
-
|
9 |
-
class TestBasic(TestSdk):
|
10 |
-
|
11 |
-
def test_version(self):
|
12 |
-
print(ragflow.__version__)
|
13 |
-
|
14 |
-
# def test_create_dataset(self):
|
15 |
-
# res = RAGFLow(API_KEY, HOST_ADDRESS).create_dataset('abc')
|
16 |
-
# print(res)
|
17 |
-
#
|
18 |
-
# def test_delete_dataset(self):
|
19 |
-
# assert RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
|
20 |
-
#
|
21 |
-
# def test_list_dataset_success(self, ragflow_instance, monkeypatch):
|
22 |
-
# # Mocking the response of requests.get method
|
23 |
-
# mock_response = MagicMock()
|
24 |
-
# mock_response.status_code = 200
|
25 |
-
# mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
|
26 |
-
#
|
27 |
-
# # Patching requests.get to return the mock_response
|
28 |
-
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
29 |
-
#
|
30 |
-
# # Call the method under test
|
31 |
-
# result = ragflow_instance.list_dataset()
|
32 |
-
#
|
33 |
-
# # Assertion
|
34 |
-
# assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
|
35 |
-
#
|
36 |
-
# def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
|
37 |
-
# # Mocking the response of requests.get method
|
38 |
-
# mock_response = MagicMock()
|
39 |
-
# mock_response.status_code = 404 # Simulating a failed request
|
40 |
-
#
|
41 |
-
# # Patching requests.get to return the mock_response
|
42 |
-
# monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
|
43 |
-
#
|
44 |
-
# # Call the method under test
|
45 |
-
# result = ragflow_instance.list_dataset()
|
46 |
-
#
|
47 |
-
# # Assertion
|
48 |
-
# assert result is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sdk/python/test/test_dataset.py
DELETED
@@ -1,468 +0,0 @@
|
|
1 |
-
from api.settings import RetCode
|
2 |
-
from test_sdkbase import TestSdk
|
3 |
-
from ragflow import RAGFlow
|
4 |
-
import pytest
|
5 |
-
from common import API_KEY, HOST_ADDRESS
|
6 |
-
from api.contants import NAME_LENGTH_LIMIT
|
7 |
-
|
8 |
-
|
9 |
-
class TestDataset(TestSdk):
|
10 |
-
"""
|
11 |
-
This class contains a suite of tests for the dataset management functionality within the RAGFlow system.
|
12 |
-
It ensures that the following functionalities as expected:
|
13 |
-
1. create a kb
|
14 |
-
2. list the kb
|
15 |
-
3. get the detail info according to the kb id
|
16 |
-
4. update the kb
|
17 |
-
5. delete the kb
|
18 |
-
"""
|
19 |
-
|
20 |
-
def setup_method(self):
|
21 |
-
"""
|
22 |
-
Delete all the datasets.
|
23 |
-
"""
|
24 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
25 |
-
listed_data = ragflow.list_dataset()
|
26 |
-
listed_data = listed_data['data']
|
27 |
-
|
28 |
-
listed_names = {d['name'] for d in listed_data}
|
29 |
-
for name in listed_names:
|
30 |
-
ragflow.delete_dataset(name)
|
31 |
-
|
32 |
-
# -----------------------create_dataset---------------------------------
|
33 |
-
def test_create_dataset_with_success(self):
|
34 |
-
"""
|
35 |
-
Test the creation of a new dataset with success.
|
36 |
-
"""
|
37 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
38 |
-
# create a kb
|
39 |
-
res = ragflow.create_dataset("kb1")
|
40 |
-
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
41 |
-
|
42 |
-
def test_create_dataset_with_empty_name(self):
|
43 |
-
"""
|
44 |
-
Test the creation of a new dataset with an empty name.
|
45 |
-
"""
|
46 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
47 |
-
res = ragflow.create_dataset("")
|
48 |
-
assert res['message'] == 'Empty dataset name' and res['code'] == RetCode.DATA_ERROR
|
49 |
-
|
50 |
-
def test_create_dataset_with_name_exceeding_limit(self):
|
51 |
-
"""
|
52 |
-
Test the creation of a new dataset with the length of name exceeding the limit.
|
53 |
-
"""
|
54 |
-
name = "k" * NAME_LENGTH_LIMIT + "b"
|
55 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
56 |
-
res = ragflow.create_dataset(name)
|
57 |
-
assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
|
58 |
-
and res['code'] == RetCode.DATA_ERROR)
|
59 |
-
|
60 |
-
def test_create_dataset_name_with_space_in_the_middle(self):
|
61 |
-
"""
|
62 |
-
Test the creation of a new dataset whose name has space in the middle.
|
63 |
-
"""
|
64 |
-
name = "k b"
|
65 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
66 |
-
res = ragflow.create_dataset(name)
|
67 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
68 |
-
|
69 |
-
def test_create_dataset_name_with_space_in_the_head(self):
|
70 |
-
"""
|
71 |
-
Test the creation of a new dataset whose name has space in the head.
|
72 |
-
"""
|
73 |
-
name = " kb"
|
74 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
75 |
-
res = ragflow.create_dataset(name)
|
76 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
77 |
-
|
78 |
-
def test_create_dataset_name_with_space_in_the_tail(self):
|
79 |
-
"""
|
80 |
-
Test the creation of a new dataset whose name has space in the tail.
|
81 |
-
"""
|
82 |
-
name = "kb "
|
83 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
84 |
-
res = ragflow.create_dataset(name)
|
85 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
86 |
-
|
87 |
-
def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
|
88 |
-
"""
|
89 |
-
Test the creation of a new dataset whose name has space in the head and tail,
|
90 |
-
and the length of the name exceeds the limit.
|
91 |
-
"""
|
92 |
-
name = " " + "k" * NAME_LENGTH_LIMIT + " "
|
93 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
94 |
-
res = ragflow.create_dataset(name)
|
95 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
96 |
-
|
97 |
-
def test_create_dataset_with_two_same_name(self):
|
98 |
-
"""
|
99 |
-
Test the creation of two new datasets with the same name.
|
100 |
-
"""
|
101 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
102 |
-
res = ragflow.create_dataset("kb")
|
103 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
104 |
-
res = ragflow.create_dataset("kb")
|
105 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
106 |
-
|
107 |
-
def test_create_dataset_with_only_space_in_the_name(self):
|
108 |
-
"""
|
109 |
-
Test the creation of a dataset whose name only has space.
|
110 |
-
"""
|
111 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
112 |
-
res = ragflow.create_dataset(" ")
|
113 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
114 |
-
|
115 |
-
def test_create_dataset_with_space_number_exceeding_limit(self):
|
116 |
-
"""
|
117 |
-
Test the creation of a dataset with a name that only has space exceeds the allowed limit.
|
118 |
-
"""
|
119 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
120 |
-
name = " " * NAME_LENGTH_LIMIT
|
121 |
-
res = ragflow.create_dataset(name)
|
122 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
123 |
-
|
124 |
-
def test_create_dataset_with_name_having_return(self):
|
125 |
-
"""
|
126 |
-
Test the creation of a dataset with a name that has return symbol.
|
127 |
-
"""
|
128 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
129 |
-
name = "kb\n"
|
130 |
-
res = ragflow.create_dataset(name)
|
131 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
132 |
-
|
133 |
-
def test_create_dataset_with_name_having_the_null_character(self):
|
134 |
-
"""
|
135 |
-
Test the creation of a dataset with a name that has the null character.
|
136 |
-
"""
|
137 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
138 |
-
name = "kb\0"
|
139 |
-
res = ragflow.create_dataset(name)
|
140 |
-
assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
|
141 |
-
|
142 |
-
# -----------------------list_dataset---------------------------------
|
143 |
-
def test_list_dataset_success(self):
|
144 |
-
"""
|
145 |
-
Test listing datasets with a successful outcome.
|
146 |
-
"""
|
147 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
148 |
-
# Call the list_datasets method
|
149 |
-
response = ragflow.list_dataset()
|
150 |
-
assert response['code'] == RetCode.SUCCESS
|
151 |
-
|
152 |
-
def test_list_dataset_with_checking_size_and_name(self):
|
153 |
-
"""
|
154 |
-
Test listing datasets and verify the size and names of the datasets.
|
155 |
-
"""
|
156 |
-
datasets_to_create = ["dataset1", "dataset2", "dataset3"]
|
157 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
158 |
-
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
|
159 |
-
|
160 |
-
real_name_to_create = set()
|
161 |
-
for response in created_response:
|
162 |
-
assert 'data' in response, "Response is missing 'data' key"
|
163 |
-
dataset_name = response['data']['dataset_name']
|
164 |
-
real_name_to_create.add(dataset_name)
|
165 |
-
|
166 |
-
response = ragflow.list_dataset(0, 3)
|
167 |
-
listed_data = response['data']
|
168 |
-
|
169 |
-
listed_names = {d['name'] for d in listed_data}
|
170 |
-
assert listed_names == real_name_to_create
|
171 |
-
assert response['code'] == RetCode.SUCCESS
|
172 |
-
assert len(listed_data) == len(datasets_to_create)
|
173 |
-
|
174 |
-
def test_list_dataset_with_getting_empty_result(self):
|
175 |
-
"""
|
176 |
-
Test listing datasets that should be empty.
|
177 |
-
"""
|
178 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
179 |
-
datasets_to_create = []
|
180 |
-
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
|
181 |
-
|
182 |
-
real_name_to_create = set()
|
183 |
-
for response in created_response:
|
184 |
-
assert 'data' in response, "Response is missing 'data' key"
|
185 |
-
dataset_name = response['data']['dataset_name']
|
186 |
-
real_name_to_create.add(dataset_name)
|
187 |
-
|
188 |
-
response = ragflow.list_dataset(0, 0)
|
189 |
-
listed_data = response['data']
|
190 |
-
|
191 |
-
listed_names = {d['name'] for d in listed_data}
|
192 |
-
|
193 |
-
assert listed_names == real_name_to_create
|
194 |
-
assert response['code'] == RetCode.SUCCESS
|
195 |
-
assert len(listed_data) == 0
|
196 |
-
|
197 |
-
def test_list_dataset_with_creating_100_knowledge_bases(self):
|
198 |
-
"""
|
199 |
-
Test listing 100 datasets and verify the size and names of these datasets.
|
200 |
-
"""
|
201 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
202 |
-
datasets_to_create = ["dataset1"] * 100
|
203 |
-
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
|
204 |
-
|
205 |
-
real_name_to_create = set()
|
206 |
-
for response in created_response:
|
207 |
-
assert 'data' in response, "Response is missing 'data' key"
|
208 |
-
dataset_name = response['data']['dataset_name']
|
209 |
-
real_name_to_create.add(dataset_name)
|
210 |
-
|
211 |
-
res = ragflow.list_dataset(0, 100)
|
212 |
-
listed_data = res['data']
|
213 |
-
|
214 |
-
listed_names = {d['name'] for d in listed_data}
|
215 |
-
assert listed_names == real_name_to_create
|
216 |
-
assert res['code'] == RetCode.SUCCESS
|
217 |
-
assert len(listed_data) == 100
|
218 |
-
|
219 |
-
def test_list_dataset_with_showing_one_dataset(self):
|
220 |
-
"""
|
221 |
-
Test listing one dataset and verify the size of the dataset.
|
222 |
-
"""
|
223 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
224 |
-
response = ragflow.list_dataset(0, 1)
|
225 |
-
datasets = response['data']
|
226 |
-
assert len(datasets) == 1 and response['code'] == RetCode.SUCCESS
|
227 |
-
|
228 |
-
def test_list_dataset_failure(self):
|
229 |
-
"""
|
230 |
-
Test listing datasets with IndexError.
|
231 |
-
"""
|
232 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
233 |
-
response = ragflow.list_dataset(-1, -1)
|
234 |
-
assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
|
235 |
-
|
236 |
-
def test_list_dataset_for_empty_datasets(self):
|
237 |
-
"""
|
238 |
-
Test listing datasets when the datasets are empty.
|
239 |
-
"""
|
240 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
241 |
-
response = ragflow.list_dataset()
|
242 |
-
datasets = response['data']
|
243 |
-
assert len(datasets) == 0 and response['code'] == RetCode.SUCCESS
|
244 |
-
|
245 |
-
# TODO: have to set the limitation of the number of datasets
|
246 |
-
|
247 |
-
# -----------------------delete_dataset---------------------------------
|
248 |
-
def test_delete_one_dataset_with_success(self):
|
249 |
-
"""
|
250 |
-
Test deleting a dataset with success.
|
251 |
-
"""
|
252 |
-
# get the real name of the created dataset
|
253 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
254 |
-
res = ragflow.create_dataset("kb0")
|
255 |
-
real_dataset_name = res['data']['dataset_name']
|
256 |
-
# delete this dataset
|
257 |
-
res = ragflow.delete_dataset(real_dataset_name)
|
258 |
-
assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
|
259 |
-
|
260 |
-
def test_delete_dataset_with_not_existing_dataset(self):
|
261 |
-
"""
|
262 |
-
Test deleting a dataset that does not exist with failure.
|
263 |
-
"""
|
264 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
265 |
-
res = ragflow.delete_dataset("weird_dataset")
|
266 |
-
assert res['code'] == RetCode.OPERATING_ERROR and res['message'] == 'The dataset cannot be found for your current account.'
|
267 |
-
|
268 |
-
def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
|
269 |
-
"""
|
270 |
-
Test deleting a dataset when creating 100 datasets and deleting 100 datasets.
|
271 |
-
"""
|
272 |
-
# create 100 datasets
|
273 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
274 |
-
datasets_to_create = ["dataset1"] * 100
|
275 |
-
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
|
276 |
-
|
277 |
-
real_name_to_create = set()
|
278 |
-
for response in created_response:
|
279 |
-
assert 'data' in response, "Response is missing 'data' key"
|
280 |
-
dataset_name = response['data']['dataset_name']
|
281 |
-
real_name_to_create.add(dataset_name)
|
282 |
-
|
283 |
-
for name in real_name_to_create:
|
284 |
-
res = ragflow.delete_dataset(name)
|
285 |
-
assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
|
286 |
-
|
287 |
-
def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
|
288 |
-
"""
|
289 |
-
Test deleting a dataset when its name has space in the middle.
|
290 |
-
"""
|
291 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
292 |
-
ragflow.create_dataset("k b")
|
293 |
-
res = ragflow.delete_dataset("k b")
|
294 |
-
assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
|
295 |
-
|
296 |
-
def test_delete_dataset_with_space_in_the_head_of_the_name(self):
|
297 |
-
"""
|
298 |
-
Test deleting a dataset when its name has space in the head.
|
299 |
-
"""
|
300 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
301 |
-
ragflow.create_dataset(" kb")
|
302 |
-
res = ragflow.delete_dataset(" kb")
|
303 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
304 |
-
and res['message'] == 'The dataset cannot be found for your current account.')
|
305 |
-
|
306 |
-
def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
|
307 |
-
"""
|
308 |
-
Test deleting a dataset when its name has space in the tail.
|
309 |
-
"""
|
310 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
311 |
-
ragflow.create_dataset("kb ")
|
312 |
-
res = ragflow.delete_dataset("kb ")
|
313 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
314 |
-
and res['message'] == 'The dataset cannot be found for your current account.')
|
315 |
-
|
316 |
-
def test_delete_dataset_with_only_space_in_the_name(self):
|
317 |
-
"""
|
318 |
-
Test deleting a dataset when its name only has space.
|
319 |
-
"""
|
320 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
321 |
-
ragflow.create_dataset(" ")
|
322 |
-
res = ragflow.delete_dataset(" ")
|
323 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
324 |
-
and res['message'] == 'The dataset cannot be found for your current account.')
|
325 |
-
|
326 |
-
def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
|
327 |
-
"""
|
328 |
-
Test deleting a dataset when its name only has space and the number of it exceeds the limit.
|
329 |
-
"""
|
330 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
331 |
-
name = " " * (NAME_LENGTH_LIMIT + 1)
|
332 |
-
ragflow.create_dataset(name)
|
333 |
-
res = ragflow.delete_dataset(name)
|
334 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
335 |
-
and res['message'] == 'The dataset cannot be found for your current account.')
|
336 |
-
|
337 |
-
def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
|
338 |
-
"""
|
339 |
-
Test deleting a dataset whose name has space in the head and tail,
|
340 |
-
and the length of the name exceeds the limit.
|
341 |
-
"""
|
342 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
343 |
-
name = " " + "k" * NAME_LENGTH_LIMIT + " "
|
344 |
-
ragflow.create_dataset(name)
|
345 |
-
res = ragflow.delete_dataset(name)
|
346 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
347 |
-
and res['message'] == 'The dataset cannot be found for your current account.')
|
348 |
-
|
349 |
-
# ---------------------------------get_dataset-----------------------------------------
|
350 |
-
|
351 |
-
def test_get_dataset_with_success(self):
|
352 |
-
"""
|
353 |
-
Test getting a dataset which exists.
|
354 |
-
"""
|
355 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
356 |
-
response = ragflow.create_dataset("test")
|
357 |
-
dataset_name = response['data']['dataset_name']
|
358 |
-
res = ragflow.get_dataset(dataset_name)
|
359 |
-
assert res['code'] == RetCode.SUCCESS and res['data']['name'] == dataset_name
|
360 |
-
|
361 |
-
def test_get_dataset_with_failure(self):
|
362 |
-
"""
|
363 |
-
Test getting a dataset which does not exist.
|
364 |
-
"""
|
365 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
366 |
-
res = ragflow.get_dataset("weird_dataset")
|
367 |
-
assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset!"
|
368 |
-
|
369 |
-
# ---------------------------------update a dataset-----------------------------------
|
370 |
-
|
371 |
-
def test_update_dataset_without_existing_dataset(self):
|
372 |
-
"""
|
373 |
-
Test updating a dataset which does not exist.
|
374 |
-
"""
|
375 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
376 |
-
params = {
|
377 |
-
'name': 'new_name3',
|
378 |
-
'description': 'new_description',
|
379 |
-
"permission": 'me',
|
380 |
-
"parser_id": 'naive',
|
381 |
-
"language": 'English'
|
382 |
-
}
|
383 |
-
res = ragflow.update_dataset("weird_dataset", **params)
|
384 |
-
assert (res['code'] == RetCode.OPERATING_ERROR
|
385 |
-
and res['message'] == 'Only the owner of knowledgebase is authorized for this operation!')
|
386 |
-
|
387 |
-
def test_update_dataset_with_updating_six_parameters(self):
|
388 |
-
"""
|
389 |
-
Test updating a dataset when updating six parameters.
|
390 |
-
"""
|
391 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
392 |
-
ragflow.create_dataset("new_name1")
|
393 |
-
params = {
|
394 |
-
'name': 'new_name',
|
395 |
-
'description': 'new_description1',
|
396 |
-
"permission": 'me',
|
397 |
-
"parser_id": 'naive',
|
398 |
-
"language": 'English'
|
399 |
-
}
|
400 |
-
res = ragflow.update_dataset("new_name1", **params)
|
401 |
-
assert res['code'] == RetCode.SUCCESS
|
402 |
-
assert (res['data']['description'] == 'new_description1'
|
403 |
-
and res['data']['name'] == 'new_name' and res['data']['permission'] == 'me'
|
404 |
-
and res['data']['language'] == 'English' and res['data']['parser_id'] == 'naive')
|
405 |
-
|
406 |
-
def test_update_dataset_with_updating_two_parameters(self):
|
407 |
-
"""
|
408 |
-
Test updating a dataset when updating two parameters.
|
409 |
-
"""
|
410 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
411 |
-
ragflow.create_dataset("new_name2")
|
412 |
-
params = {
|
413 |
-
"name": "new_name3",
|
414 |
-
"language": 'English'
|
415 |
-
}
|
416 |
-
res = ragflow.update_dataset("new_name2", **params)
|
417 |
-
assert (res['code'] == RetCode.SUCCESS and res['data']['name'] == "new_name3"
|
418 |
-
and res['data']['language'] == 'English')
|
419 |
-
|
420 |
-
def test_update_dataset_with_updating_layout_recognize(self):
|
421 |
-
"""Test updating a dataset with only updating the layout_recognize"""
|
422 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
423 |
-
ragflow.create_dataset("test_update_dataset_with_updating_layout_recognize")
|
424 |
-
params = {
|
425 |
-
"layout_recognize": False
|
426 |
-
}
|
427 |
-
res = ragflow.update_dataset("test_update_dataset_with_updating_layout_recognize", **params)
|
428 |
-
assert res['code'] == RetCode.SUCCESS and res['data']['parser_config']['layout_recognize'] is False
|
429 |
-
|
430 |
-
def test_update_dataset_with_empty_parameter(self):
|
431 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
432 |
-
ragflow.create_dataset("test_update_dataset_with_empty_parameter")
|
433 |
-
params = {}
|
434 |
-
res = ragflow.update_dataset("test_update_dataset_with_empty_parameter", **params)
|
435 |
-
assert (res['code'] == RetCode.DATA_ERROR
|
436 |
-
and res['message'] == 'Please input at least one parameter that you want to update!')
|
437 |
-
|
438 |
-
# ---------------------------------mix the different methods--------------------------
|
439 |
-
|
440 |
-
def test_create_and_delete_dataset_together(self):
|
441 |
-
"""
|
442 |
-
Test creating 1 dataset, and then deleting 1 dataset.
|
443 |
-
Test creating 10 datasets, and then deleting 10 datasets.
|
444 |
-
"""
|
445 |
-
# create 1 dataset
|
446 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
447 |
-
res = ragflow.create_dataset("ddd")
|
448 |
-
assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
|
449 |
-
|
450 |
-
# delete 1 dataset
|
451 |
-
res = ragflow.delete_dataset("ddd")
|
452 |
-
assert res["code"] == RetCode.SUCCESS
|
453 |
-
|
454 |
-
# create 10 datasets
|
455 |
-
datasets_to_create = ["dataset1"] * 10
|
456 |
-
created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
|
457 |
-
|
458 |
-
real_name_to_create = set()
|
459 |
-
for response in created_response:
|
460 |
-
assert 'data' in response, "Response is missing 'data' key"
|
461 |
-
dataset_name = response['data']['dataset_name']
|
462 |
-
real_name_to_create.add(dataset_name)
|
463 |
-
|
464 |
-
# delete 10 datasets
|
465 |
-
for name in real_name_to_create:
|
466 |
-
res = ragflow.delete_dataset(name)
|
467 |
-
assert res["code"] == RetCode.SUCCESS
|
468 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sdk/python/test/test_document.py
DELETED
@@ -1,1130 +0,0 @@
|
|
1 |
-
from api.settings import RetCode
|
2 |
-
from test_sdkbase import TestSdk
|
3 |
-
from ragflow import RAGFlow
|
4 |
-
import pytest
|
5 |
-
from common import API_KEY, HOST_ADDRESS
|
6 |
-
|
7 |
-
|
8 |
-
class TestFile(TestSdk):
|
9 |
-
"""
|
10 |
-
This class contains a suite of tests for the content management functionality within the dataset.
|
11 |
-
It ensures that the following functionalities as expected:
|
12 |
-
1. upload local files
|
13 |
-
2. upload remote files
|
14 |
-
3. download a file
|
15 |
-
4. delete a file
|
16 |
-
5. enable rename
|
17 |
-
6. list files
|
18 |
-
7. start parsing
|
19 |
-
8. end parsing
|
20 |
-
9. check the status of the file
|
21 |
-
10. list the chunks
|
22 |
-
11. delete a chunk
|
23 |
-
12. insert a new chunk
|
24 |
-
13. edit the status of chunk
|
25 |
-
14. get the specific chunk
|
26 |
-
15. retrieval test
|
27 |
-
"""
|
28 |
-
|
29 |
-
# ----------------------------upload local files-----------------------------------------------------
|
30 |
-
def test_upload_two_files(self):
|
31 |
-
"""
|
32 |
-
Test uploading two files with success.
|
33 |
-
"""
|
34 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
35 |
-
created_res = ragflow.create_dataset("test_upload_two_files")
|
36 |
-
dataset_id = created_res["data"]["dataset_id"]
|
37 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
38 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
39 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
|
40 |
-
|
41 |
-
def test_upload_one_file(self):
|
42 |
-
"""
|
43 |
-
Test uploading one file with success.
|
44 |
-
"""
|
45 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
46 |
-
created_res = ragflow.create_dataset("test_upload_one_file")
|
47 |
-
dataset_id = created_res["data"]["dataset_id"]
|
48 |
-
file_paths = ["test_data/test.txt"]
|
49 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
50 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
|
51 |
-
|
52 |
-
def test_upload_nonexistent_files(self):
|
53 |
-
"""
|
54 |
-
Test uploading a file which does not exist.
|
55 |
-
"""
|
56 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
57 |
-
created_res = ragflow.create_dataset("test_upload_nonexistent_files")
|
58 |
-
dataset_id = created_res["data"]["dataset_id"]
|
59 |
-
file_paths = ["test_data/imagination.txt"]
|
60 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
61 |
-
assert res["code"] == RetCode.DATA_ERROR and "does not exist" in res["message"]
|
62 |
-
|
63 |
-
def test_upload_file_if_dataset_does_not_exist(self):
|
64 |
-
"""
|
65 |
-
Test uploading files if the dataset id does not exist.
|
66 |
-
"""
|
67 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
68 |
-
file_paths = ["test_data/test.txt"]
|
69 |
-
res = ragflow.upload_local_file("111", file_paths)
|
70 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Can't find this dataset"
|
71 |
-
|
72 |
-
def test_upload_file_without_name(self):
|
73 |
-
"""
|
74 |
-
Test uploading files that do not have name.
|
75 |
-
"""
|
76 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
77 |
-
created_res = ragflow.create_dataset("test_upload_file_without_name")
|
78 |
-
dataset_id = created_res["data"]["dataset_id"]
|
79 |
-
file_paths = ["test_data/.txt"]
|
80 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
81 |
-
assert res["code"] == RetCode.SUCCESS
|
82 |
-
|
83 |
-
def test_upload_file_without_name1(self):
|
84 |
-
"""
|
85 |
-
Test uploading files that do not have name.
|
86 |
-
"""
|
87 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
88 |
-
created_res = ragflow.create_dataset("test_upload_file_without_name")
|
89 |
-
dataset_id = created_res["data"]["dataset_id"]
|
90 |
-
file_paths = ["test_data/.txt", "test_data/empty.txt"]
|
91 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
92 |
-
assert res["code"] == RetCode.SUCCESS
|
93 |
-
|
94 |
-
def test_upload_files_exceeding_the_number_limit(self):
|
95 |
-
"""
|
96 |
-
Test uploading files whose number exceeds the limit.
|
97 |
-
"""
|
98 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
99 |
-
created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit")
|
100 |
-
dataset_id = created_res["data"]["dataset_id"]
|
101 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256
|
102 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
103 |
-
assert (res["message"] ==
|
104 |
-
"You try to upload 512 files, which exceeds the maximum number of uploading files: 256"
|
105 |
-
and res["code"] == RetCode.DATA_ERROR)
|
106 |
-
|
107 |
-
def test_upload_files_without_files(self):
|
108 |
-
"""
|
109 |
-
Test uploading files without files.
|
110 |
-
"""
|
111 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
112 |
-
created_res = ragflow.create_dataset("test_upload_files_without_files")
|
113 |
-
dataset_id = created_res["data"]["dataset_id"]
|
114 |
-
file_paths = [None]
|
115 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
116 |
-
assert (res["message"] == "None is not string." and res["code"] == RetCode.ARGUMENT_ERROR)
|
117 |
-
|
118 |
-
def test_upload_files_with_two_files_with_same_name(self):
|
119 |
-
"""
|
120 |
-
Test uploading files with the same name.
|
121 |
-
"""
|
122 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
123 |
-
created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name")
|
124 |
-
dataset_id = created_res["data"]["dataset_id"]
|
125 |
-
file_paths = ["test_data/test.txt"] * 2
|
126 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
127 |
-
assert (res["message"] == "success" and res["code"] == RetCode.SUCCESS)
|
128 |
-
|
129 |
-
def test_upload_files_with_file_paths(self):
|
130 |
-
"""
|
131 |
-
Test uploading files with only specifying the file path's repo.
|
132 |
-
"""
|
133 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
134 |
-
created_res = ragflow.create_dataset("test_upload_files_with_file_paths")
|
135 |
-
dataset_id = created_res["data"]["dataset_id"]
|
136 |
-
file_paths = ["test_data/"]
|
137 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
138 |
-
assert (res["message"] == "The file test_data/ does not exist" and res["code"] == RetCode.DATA_ERROR)
|
139 |
-
|
140 |
-
def test_upload_files_with_remote_file_path(self):
|
141 |
-
"""
|
142 |
-
Test uploading files with remote files.
|
143 |
-
"""
|
144 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
145 |
-
created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path")
|
146 |
-
dataset_id = created_res["data"]["dataset_id"]
|
147 |
-
file_paths = ["https://github.com/genostack/ragflow"]
|
148 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
149 |
-
assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "Remote files have not unsupported."
|
150 |
-
|
151 |
-
# ----------------------------delete a file-----------------------------------------------------
|
152 |
-
def test_delete_one_file(self):
|
153 |
-
"""
|
154 |
-
Test deleting one file with success.
|
155 |
-
"""
|
156 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
157 |
-
created_res = ragflow.create_dataset("test_delete_one_file")
|
158 |
-
dataset_id = created_res["data"]["dataset_id"]
|
159 |
-
file_paths = ["test_data/test.txt"]
|
160 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
161 |
-
# get the doc_id
|
162 |
-
data = res["data"][0]
|
163 |
-
doc_id = data["id"]
|
164 |
-
# delete the files
|
165 |
-
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
166 |
-
# assert value
|
167 |
-
assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
|
168 |
-
|
169 |
-
def test_delete_document_with_not_existing_document(self):
|
170 |
-
"""
|
171 |
-
Test deleting a document that does not exist with failure.
|
172 |
-
"""
|
173 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
174 |
-
created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
|
175 |
-
dataset_id = created_res["data"]["dataset_id"]
|
176 |
-
res = ragflow.delete_files("111", dataset_id)
|
177 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Document 111 not found!"
|
178 |
-
|
179 |
-
def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
|
180 |
-
"""
|
181 |
-
Test deleting documents when uploading 100 docs and deleting 100 docs.
|
182 |
-
"""
|
183 |
-
# upload 100 docs
|
184 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
185 |
-
created_res = ragflow.create_dataset("test_delete_one_file")
|
186 |
-
dataset_id = created_res["data"]["dataset_id"]
|
187 |
-
file_paths = ["test_data/test.txt"] * 100
|
188 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
189 |
-
|
190 |
-
# get the doc_id
|
191 |
-
data = res["data"]
|
192 |
-
for d in data:
|
193 |
-
doc_id = d["id"]
|
194 |
-
# delete the files
|
195 |
-
deleted_res = ragflow.delete_files(doc_id, dataset_id)
|
196 |
-
# assert value
|
197 |
-
assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
|
198 |
-
|
199 |
-
def test_delete_document_from_nonexistent_dataset(self):
|
200 |
-
"""
|
201 |
-
Test deleting documents from a non-existent dataset
|
202 |
-
"""
|
203 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
204 |
-
created_res = ragflow.create_dataset("test_delete_one_file")
|
205 |
-
dataset_id = created_res["data"]["dataset_id"]
|
206 |
-
file_paths = ["test_data/test.txt"]
|
207 |
-
res = ragflow.upload_local_file(dataset_id, file_paths)
|
208 |
-
# get the doc_id
|
209 |
-
data = res["data"][0]
|
210 |
-
doc_id = data["id"]
|
211 |
-
# delete the files
|
212 |
-
deleted_res = ragflow.delete_files(doc_id, "000")
|
213 |
-
# assert value
|
214 |
-
assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
|
215 |
-
f"The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.")
|
216 |
-
|
217 |
-
def test_delete_document_which_is_located_in_other_dataset(self):
|
218 |
-
"""
|
219 |
-
Test deleting a document which is located in other dataset.
|
220 |
-
"""
|
221 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
222 |
-
# upload a document
|
223 |
-
created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
|
224 |
-
created_res_id = created_res["data"]["dataset_id"]
|
225 |
-
file_paths = ["test_data/test.txt"]
|
226 |
-
res = ragflow.upload_local_file(created_res_id, file_paths)
|
227 |
-
# other dataset
|
228 |
-
other_res = ragflow.create_dataset("other_dataset")
|
229 |
-
other_dataset_id = other_res["data"]["dataset_id"]
|
230 |
-
# get the doc_id
|
231 |
-
data = res["data"][0]
|
232 |
-
doc_id = data["id"]
|
233 |
-
# delete the files from the other dataset
|
234 |
-
deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
|
235 |
-
# assert value
|
236 |
-
assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
|
237 |
-
f"The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.")
|
238 |
-
|
239 |
-
# ----------------------------list files-----------------------------------------------------
|
240 |
-
def test_list_documents_with_success(self):
|
241 |
-
"""
|
242 |
-
Test listing documents with a successful outcome.
|
243 |
-
"""
|
244 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
245 |
-
# upload a document
|
246 |
-
created_res = ragflow.create_dataset("test_list_documents_with_success")
|
247 |
-
created_res_id = created_res["data"]["dataset_id"]
|
248 |
-
file_paths = ["test_data/test.txt"]
|
249 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
250 |
-
# Call the list_document method
|
251 |
-
response = ragflow.list_files(created_res_id)
|
252 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
|
253 |
-
|
254 |
-
def test_list_documents_with_checking_size(self):
|
255 |
-
"""
|
256 |
-
Test listing documents and verify the size and names of the documents.
|
257 |
-
"""
|
258 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
259 |
-
# upload 10 documents
|
260 |
-
created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
|
261 |
-
created_res_id = created_res["data"]["dataset_id"]
|
262 |
-
file_paths = ["test_data/test.txt"] * 10
|
263 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
264 |
-
# Call the list_document method
|
265 |
-
response = ragflow.list_files(created_res_id)
|
266 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
|
267 |
-
|
268 |
-
def test_list_documents_with_getting_empty_result(self):
|
269 |
-
"""
|
270 |
-
Test listing documents that should be empty.
|
271 |
-
"""
|
272 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
273 |
-
# upload 0 documents
|
274 |
-
created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
|
275 |
-
created_res_id = created_res["data"]["dataset_id"]
|
276 |
-
# Call the list_document method
|
277 |
-
response = ragflow.list_files(created_res_id)
|
278 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 0
|
279 |
-
|
280 |
-
def test_list_documents_with_creating_100_documents(self):
|
281 |
-
"""
|
282 |
-
Test listing 100 documents and verify the size of these documents.
|
283 |
-
"""
|
284 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
285 |
-
# upload 100 documents
|
286 |
-
created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
|
287 |
-
created_res_id = created_res["data"]["dataset_id"]
|
288 |
-
file_paths = ["test_data/test.txt"] * 100
|
289 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
290 |
-
# Call the list_document method
|
291 |
-
response = ragflow.list_files(created_res_id)
|
292 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 100
|
293 |
-
|
294 |
-
def test_list_document_with_failure(self):
|
295 |
-
"""
|
296 |
-
Test listing documents with IndexError.
|
297 |
-
"""
|
298 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
299 |
-
created_res = ragflow.create_dataset("test_list_document_with_failure")
|
300 |
-
created_res_id = created_res["data"]["dataset_id"]
|
301 |
-
response = ragflow.list_files(created_res_id, offset=-1, count=-1)
|
302 |
-
assert "IndexError" in response["message"] and response["code"] == RetCode.EXCEPTION_ERROR
|
303 |
-
|
304 |
-
def test_list_document_with_verifying_offset_and_count(self):
|
305 |
-
"""
|
306 |
-
Test listing documents with verifying the functionalities of offset and count.
|
307 |
-
"""
|
308 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
309 |
-
created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
|
310 |
-
created_res_id = created_res["data"]["dataset_id"]
|
311 |
-
file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
|
312 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
313 |
-
# Call the list_document method
|
314 |
-
response = ragflow.list_files(created_res_id, offset=2, count=10)
|
315 |
-
|
316 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
|
317 |
-
|
318 |
-
def test_list_document_with_verifying_keywords(self):
|
319 |
-
"""
|
320 |
-
Test listing documents with verifying the functionality of searching keywords.
|
321 |
-
"""
|
322 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
323 |
-
created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
|
324 |
-
created_res_id = created_res["data"]["dataset_id"]
|
325 |
-
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
326 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
327 |
-
# Call the list_document method
|
328 |
-
response = ragflow.list_files(created_res_id, keywords="empty")
|
329 |
-
|
330 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
|
331 |
-
|
332 |
-
def test_list_document_with_verifying_order_by_and_descend(self):
|
333 |
-
"""
|
334 |
-
Test listing documents with verifying the functionality of order_by and descend.
|
335 |
-
"""
|
336 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
337 |
-
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
|
338 |
-
created_res_id = created_res["data"]["dataset_id"]
|
339 |
-
file_paths = ["test_data/test.txt", "test_data/empty.txt"]
|
340 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
341 |
-
# Call the list_document method
|
342 |
-
response = ragflow.list_files(created_res_id)
|
343 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 2
|
344 |
-
docs = response["data"]["docs"]
|
345 |
-
# reverse
|
346 |
-
i = 1
|
347 |
-
for doc in docs:
|
348 |
-
assert doc["name"] in file_paths[i]
|
349 |
-
i -= 1
|
350 |
-
|
351 |
-
def test_list_document_with_verifying_order_by_and_ascend(self):
|
352 |
-
"""
|
353 |
-
Test listing documents with verifying the functionality of order_by and ascend.
|
354 |
-
"""
|
355 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
356 |
-
created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
|
357 |
-
created_res_id = created_res["data"]["dataset_id"]
|
358 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
|
359 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
360 |
-
# Call the list_document method
|
361 |
-
response = ragflow.list_files(created_res_id, descend=False)
|
362 |
-
assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 3
|
363 |
-
|
364 |
-
docs = response["data"]["docs"]
|
365 |
-
|
366 |
-
i = 0
|
367 |
-
for doc in docs:
|
368 |
-
assert doc["name"] in file_paths[i]
|
369 |
-
i += 1
|
370 |
-
|
371 |
-
# ----------------------------update files: enable, rename, template_type-------------------------------------------
|
372 |
-
|
373 |
-
def test_update_nonexistent_document(self):
|
374 |
-
"""
|
375 |
-
Test updating a document which does not exist.
|
376 |
-
"""
|
377 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
378 |
-
created_res = ragflow.create_dataset("test_update_nonexistent_document")
|
379 |
-
created_res_id = created_res["data"]["dataset_id"]
|
380 |
-
params = {
|
381 |
-
"name": "new_name"
|
382 |
-
}
|
383 |
-
res = ragflow.update_file(created_res_id, "weird_doc_id", **params)
|
384 |
-
assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document weird_doc_id cannot be found!"
|
385 |
-
|
386 |
-
def test_update_document_without_parameters(self):
|
387 |
-
"""
|
388 |
-
Test updating a document without giving parameters.
|
389 |
-
"""
|
390 |
-
# create a dataset
|
391 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
392 |
-
created_res = ragflow.create_dataset("test_update_document_without_parameters")
|
393 |
-
created_res_id = created_res["data"]["dataset_id"]
|
394 |
-
# upload files
|
395 |
-
file_paths = ["test_data/test.txt"]
|
396 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
397 |
-
# get the doc_id
|
398 |
-
data = uploading_res["data"][0]
|
399 |
-
doc_id = data["id"]
|
400 |
-
# update file
|
401 |
-
params = {
|
402 |
-
}
|
403 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
404 |
-
assert (update_res["code"] == RetCode.DATA_ERROR and
|
405 |
-
update_res["message"] == "Please input at least one parameter that you want to update!")
|
406 |
-
|
407 |
-
def test_update_document_in_nonexistent_dataset(self):
|
408 |
-
"""
|
409 |
-
Test updating a document in the nonexistent dataset.
|
410 |
-
"""
|
411 |
-
# create a dataset
|
412 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
413 |
-
created_res = ragflow.create_dataset("test_update_document_in_nonexistent_dataset")
|
414 |
-
created_res_id = created_res["data"]["dataset_id"]
|
415 |
-
# upload files
|
416 |
-
file_paths = ["test_data/test.txt"]
|
417 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
418 |
-
# get the doc_id
|
419 |
-
data = uploading_res["data"][0]
|
420 |
-
doc_id = data["id"]
|
421 |
-
# update file
|
422 |
-
params = {
|
423 |
-
"name": "new_name"
|
424 |
-
}
|
425 |
-
update_res = ragflow.update_file("fake_dataset_id", doc_id, **params)
|
426 |
-
assert (update_res["code"] == RetCode.DATA_ERROR and
|
427 |
-
update_res["message"] == f"This dataset fake_dataset_id cannot be found!")
|
428 |
-
|
429 |
-
def test_update_document_with_different_extension_name(self):
|
430 |
-
"""
|
431 |
-
Test the updating of a document with an extension name that differs from its original.
|
432 |
-
"""
|
433 |
-
# create a dataset
|
434 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
435 |
-
created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
|
436 |
-
created_res_id = created_res["data"]["dataset_id"]
|
437 |
-
# upload files
|
438 |
-
file_paths = ["test_data/test.txt"]
|
439 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
440 |
-
# get the doc_id
|
441 |
-
data = uploading_res["data"][0]
|
442 |
-
doc_id = data["id"]
|
443 |
-
# update file
|
444 |
-
params = {
|
445 |
-
"name": "new_name.doc"
|
446 |
-
}
|
447 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
448 |
-
assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
|
449 |
-
update_res["message"] == "The extension of file cannot be changed")
|
450 |
-
|
451 |
-
def test_update_document_with_duplicate_name(self):
|
452 |
-
"""
|
453 |
-
Test the updating of a document with a duplicate name.
|
454 |
-
"""
|
455 |
-
# create a dataset
|
456 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
457 |
-
created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
|
458 |
-
created_res_id = created_res["data"]["dataset_id"]
|
459 |
-
# upload files
|
460 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
461 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
462 |
-
# get the doc_id
|
463 |
-
data = uploading_res["data"][0]
|
464 |
-
doc_id = data["id"]
|
465 |
-
# update file
|
466 |
-
params = {
|
467 |
-
"name": "test.txt"
|
468 |
-
}
|
469 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
470 |
-
assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
|
471 |
-
update_res["message"] == "Duplicated document name in the same dataset.")
|
472 |
-
|
473 |
-
def test_update_document_with_updating_its_name_with_success(self):
|
474 |
-
"""
|
475 |
-
Test the updating of a document's name with success.
|
476 |
-
"""
|
477 |
-
# create a dataset
|
478 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
479 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
|
480 |
-
created_res_id = created_res["data"]["dataset_id"]
|
481 |
-
# upload files
|
482 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
483 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
484 |
-
# get the doc_id
|
485 |
-
data = uploading_res["data"][0]
|
486 |
-
doc_id = data["id"]
|
487 |
-
# update file
|
488 |
-
params = {
|
489 |
-
"name": "new_name.txt"
|
490 |
-
}
|
491 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
492 |
-
assert (update_res["code"] == RetCode.SUCCESS and
|
493 |
-
update_res["message"] == "Success" and update_res["data"]["name"] == "new_name.txt")
|
494 |
-
|
495 |
-
def test_update_document_with_updating_its_template_type_with_success(self):
|
496 |
-
"""
|
497 |
-
Test the updating of a document's template type with success.
|
498 |
-
"""
|
499 |
-
# create a dataset
|
500 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
501 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_template_type_with_success")
|
502 |
-
created_res_id = created_res["data"]["dataset_id"]
|
503 |
-
# upload files
|
504 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
505 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
506 |
-
# get the doc_id
|
507 |
-
data = uploading_res["data"][0]
|
508 |
-
doc_id = data["id"]
|
509 |
-
# update file
|
510 |
-
params = {
|
511 |
-
"template_type": "laws"
|
512 |
-
}
|
513 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
514 |
-
assert (update_res["code"] == RetCode.SUCCESS and
|
515 |
-
update_res["message"] == "Success" and update_res["data"]["parser_id"] == "laws")
|
516 |
-
|
517 |
-
def test_update_document_with_updating_its_enable_value_with_success(self):
|
518 |
-
"""
|
519 |
-
Test the updating of a document's enable value with success.
|
520 |
-
"""
|
521 |
-
# create a dataset
|
522 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
523 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_enable_value_with_success")
|
524 |
-
created_res_id = created_res["data"]["dataset_id"]
|
525 |
-
# upload files
|
526 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
527 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
528 |
-
# get the doc_id
|
529 |
-
data = uploading_res["data"][0]
|
530 |
-
doc_id = data["id"]
|
531 |
-
# update file
|
532 |
-
params = {
|
533 |
-
"enable": "0"
|
534 |
-
}
|
535 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
536 |
-
assert (update_res["code"] == RetCode.SUCCESS and
|
537 |
-
update_res["message"] == "Success" and update_res["data"]["status"] == "0")
|
538 |
-
|
539 |
-
def test_update_document_with_updating_illegal_parameter(self):
|
540 |
-
"""
|
541 |
-
Test the updating of a document's illegal parameter.
|
542 |
-
"""
|
543 |
-
# create a dataset
|
544 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
545 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_illegal_parameter")
|
546 |
-
created_res_id = created_res["data"]["dataset_id"]
|
547 |
-
# upload files
|
548 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
549 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
550 |
-
# get the doc_id
|
551 |
-
data = uploading_res["data"][0]
|
552 |
-
doc_id = data["id"]
|
553 |
-
# update file
|
554 |
-
params = {
|
555 |
-
"illegal_parameter": "0"
|
556 |
-
}
|
557 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
558 |
-
assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
|
559 |
-
update_res["message"] == "illegal_parameter is an illegal parameter.")
|
560 |
-
|
561 |
-
def test_update_document_with_giving_its_name_value(self):
|
562 |
-
"""
|
563 |
-
Test the updating of a document's name without its name value.
|
564 |
-
"""
|
565 |
-
# create a dataset
|
566 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
567 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
|
568 |
-
created_res_id = created_res["data"]["dataset_id"]
|
569 |
-
# upload files
|
570 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
571 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
572 |
-
# get the doc_id
|
573 |
-
data = uploading_res["data"][0]
|
574 |
-
doc_id = data["id"]
|
575 |
-
# update file
|
576 |
-
params = {
|
577 |
-
"name": ""
|
578 |
-
}
|
579 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
580 |
-
assert (update_res["code"] == RetCode.DATA_ERROR and
|
581 |
-
update_res["message"] == "There is no new name.")
|
582 |
-
|
583 |
-
def test_update_document_with_giving_illegal_value_for_enable(self):
|
584 |
-
"""
|
585 |
-
Test the updating of a document's with giving illegal enable's value.
|
586 |
-
"""
|
587 |
-
# create a dataset
|
588 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
589 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
|
590 |
-
created_res_id = created_res["data"]["dataset_id"]
|
591 |
-
# upload files
|
592 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
593 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
594 |
-
# get the doc_id
|
595 |
-
data = uploading_res["data"][0]
|
596 |
-
doc_id = data["id"]
|
597 |
-
# update file
|
598 |
-
params = {
|
599 |
-
"enable": "?"
|
600 |
-
}
|
601 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
602 |
-
assert (update_res["code"] == RetCode.DATA_ERROR and
|
603 |
-
update_res["message"] == "Illegal value ? for 'enable' field.")
|
604 |
-
|
605 |
-
def test_update_document_with_giving_illegal_value_for_type(self):
|
606 |
-
"""
|
607 |
-
Test the updating of a document's with giving illegal type's value.
|
608 |
-
"""
|
609 |
-
# create a dataset
|
610 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
611 |
-
created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
|
612 |
-
created_res_id = created_res["data"]["dataset_id"]
|
613 |
-
# upload files
|
614 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
615 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
616 |
-
# get the doc_id
|
617 |
-
data = uploading_res["data"][0]
|
618 |
-
doc_id = data["id"]
|
619 |
-
# update file
|
620 |
-
params = {
|
621 |
-
"template_type": "?"
|
622 |
-
}
|
623 |
-
update_res = ragflow.update_file(created_res_id, doc_id, **params)
|
624 |
-
assert (update_res["code"] == RetCode.DATA_ERROR and
|
625 |
-
update_res["message"] == "Illegal value ? for 'template_type' field.")
|
626 |
-
|
627 |
-
# ----------------------------download a file-----------------------------------------------------
|
628 |
-
|
629 |
-
def test_download_nonexistent_document(self):
|
630 |
-
"""
|
631 |
-
Test downloading a document which does not exist.
|
632 |
-
"""
|
633 |
-
# create a dataset
|
634 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
635 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
636 |
-
created_res_id = created_res["data"]["dataset_id"]
|
637 |
-
res = ragflow.download_file(created_res_id, "imagination")
|
638 |
-
assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document 'imagination' cannot be found!"
|
639 |
-
|
640 |
-
def test_download_document_in_nonexistent_dataset(self):
|
641 |
-
"""
|
642 |
-
Test downloading a document whose dataset is nonexistent.
|
643 |
-
"""
|
644 |
-
# create a dataset
|
645 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
646 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
647 |
-
created_res_id = created_res["data"]["dataset_id"]
|
648 |
-
# upload files
|
649 |
-
file_paths = ["test_data/test.txt"]
|
650 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
651 |
-
# get the doc_id
|
652 |
-
data = uploading_res["data"][0]
|
653 |
-
doc_id = data["id"]
|
654 |
-
# download file
|
655 |
-
res = ragflow.download_file("imagination", doc_id)
|
656 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == f"This dataset 'imagination' cannot be found!"
|
657 |
-
|
658 |
-
def test_download_document_with_success(self):
|
659 |
-
"""
|
660 |
-
Test the downloading of a document with success.
|
661 |
-
"""
|
662 |
-
# create a dataset
|
663 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
664 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
665 |
-
created_res_id = created_res["data"]["dataset_id"]
|
666 |
-
# upload files
|
667 |
-
file_paths = ["test_data/test.txt"]
|
668 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
669 |
-
# get the doc_id
|
670 |
-
data = uploading_res["data"][0]
|
671 |
-
doc_id = data["id"]
|
672 |
-
# download file
|
673 |
-
with open("test_data/test.txt", "rb") as file:
|
674 |
-
binary_data = file.read()
|
675 |
-
res = ragflow.download_file(created_res_id, doc_id)
|
676 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] == binary_data
|
677 |
-
|
678 |
-
def test_download_an_empty_document(self):
|
679 |
-
"""
|
680 |
-
Test the downloading of an empty document.
|
681 |
-
"""
|
682 |
-
# create a dataset
|
683 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
684 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
685 |
-
created_res_id = created_res["data"]["dataset_id"]
|
686 |
-
# upload files
|
687 |
-
file_paths = ["test_data/empty.txt"]
|
688 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
689 |
-
# get the doc_id
|
690 |
-
data = uploading_res["data"][0]
|
691 |
-
doc_id = data["id"]
|
692 |
-
# download file
|
693 |
-
res = ragflow.download_file(created_res_id, doc_id)
|
694 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This file is empty."
|
695 |
-
|
696 |
-
# ----------------------------start parsing-----------------------------------------------------
|
697 |
-
def test_start_parsing_document_with_success(self):
|
698 |
-
"""
|
699 |
-
Test the parsing of a document with success.
|
700 |
-
"""
|
701 |
-
# create a dataset
|
702 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
703 |
-
created_res = ragflow.create_dataset("test_start_parsing_document_with_success")
|
704 |
-
created_res_id = created_res["data"]["dataset_id"]
|
705 |
-
# upload files
|
706 |
-
file_paths = ["test_data/lol.txt"]
|
707 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
708 |
-
# get the doc_id
|
709 |
-
data = uploading_res["data"][0]
|
710 |
-
doc_id = data["id"]
|
711 |
-
# parse file
|
712 |
-
res = ragflow.start_parsing_document(created_res_id, doc_id)
|
713 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
714 |
-
|
715 |
-
def test_start_parsing_nonexistent_document(self):
|
716 |
-
"""
|
717 |
-
Test the parsing a document which does not exist.
|
718 |
-
"""
|
719 |
-
# create a dataset
|
720 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
721 |
-
created_res = ragflow.create_dataset("test_start_parsing_nonexistent_document")
|
722 |
-
created_res_id = created_res["data"]["dataset_id"]
|
723 |
-
res = ragflow.start_parsing_document(created_res_id, "imagination")
|
724 |
-
assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "This document 'imagination' cannot be found!"
|
725 |
-
|
726 |
-
def test_start_parsing_document_in_nonexistent_dataset(self):
|
727 |
-
"""
|
728 |
-
Test the parsing a document whose dataset is nonexistent.
|
729 |
-
"""
|
730 |
-
# create a dataset
|
731 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
732 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
733 |
-
created_res_id = created_res["data"]["dataset_id"]
|
734 |
-
# upload files
|
735 |
-
file_paths = ["test_data/test.txt"]
|
736 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
737 |
-
# get the doc_id
|
738 |
-
data = uploading_res["data"][0]
|
739 |
-
doc_id = data["id"]
|
740 |
-
# parse
|
741 |
-
res = ragflow.start_parsing_document("imagination", doc_id)
|
742 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
|
743 |
-
|
744 |
-
def test_start_parsing_an_empty_document(self):
|
745 |
-
"""
|
746 |
-
Test the parsing of an empty document.
|
747 |
-
"""
|
748 |
-
# create a dataset
|
749 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
750 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
751 |
-
created_res_id = created_res["data"]["dataset_id"]
|
752 |
-
# upload files
|
753 |
-
file_paths = ["test_data/empty.txt"]
|
754 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
755 |
-
# get the doc_id
|
756 |
-
data = uploading_res["data"][0]
|
757 |
-
doc_id = data["id"]
|
758 |
-
res = ragflow.start_parsing_document(created_res_id, doc_id)
|
759 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
|
760 |
-
|
761 |
-
# ------------------------parsing multiple documents----------------------------
|
762 |
-
def test_start_parsing_documents_in_nonexistent_dataset(self):
|
763 |
-
"""
|
764 |
-
Test the parsing documents whose dataset is nonexistent.
|
765 |
-
"""
|
766 |
-
# create a dataset
|
767 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
768 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
769 |
-
created_res_id = created_res["data"]["dataset_id"]
|
770 |
-
# upload files
|
771 |
-
file_paths = ["test_data/test.txt"]
|
772 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
773 |
-
# parse
|
774 |
-
res = ragflow.start_parsing_documents("imagination")
|
775 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
|
776 |
-
|
777 |
-
def test_start_parsing_multiple_documents(self):
|
778 |
-
"""
|
779 |
-
Test the parsing documents with a success.
|
780 |
-
"""
|
781 |
-
# create a dataset
|
782 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
783 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
784 |
-
created_res_id = created_res["data"]["dataset_id"]
|
785 |
-
# upload files
|
786 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
787 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
788 |
-
res = ragflow.start_parsing_documents(created_res_id)
|
789 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
|
790 |
-
|
791 |
-
def test_start_parsing_multiple_documents_with_one_empty_file(self):
|
792 |
-
"""
|
793 |
-
Test the parsing documents, one of which is empty.
|
794 |
-
"""
|
795 |
-
# create a dataset
|
796 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
797 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
798 |
-
created_res_id = created_res["data"]["dataset_id"]
|
799 |
-
# upload files
|
800 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
|
801 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
802 |
-
res = ragflow.start_parsing_documents(created_res_id)
|
803 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
|
804 |
-
|
805 |
-
def test_start_parsing_multiple_specific_documents(self):
|
806 |
-
"""
|
807 |
-
Test the parsing documents whose document ids are specified.
|
808 |
-
"""
|
809 |
-
# create a dataset
|
810 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
811 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
812 |
-
created_res_id = created_res["data"]["dataset_id"]
|
813 |
-
# upload files
|
814 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
815 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
816 |
-
# get the doc_id
|
817 |
-
data = uploading_res["data"]
|
818 |
-
doc_ids = []
|
819 |
-
for d in data:
|
820 |
-
doc_ids.append(d["id"])
|
821 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
822 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
823 |
-
|
824 |
-
def test_start_re_parsing_multiple_specific_documents(self):
|
825 |
-
"""
|
826 |
-
Test the re-parsing documents.
|
827 |
-
"""
|
828 |
-
# create a dataset
|
829 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
830 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
831 |
-
created_res_id = created_res["data"]["dataset_id"]
|
832 |
-
# upload files
|
833 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
834 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
835 |
-
# get the doc_id
|
836 |
-
data = uploading_res["data"]
|
837 |
-
doc_ids = []
|
838 |
-
for d in data:
|
839 |
-
doc_ids.append(d["id"])
|
840 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
841 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
842 |
-
# re-parse
|
843 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
844 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
845 |
-
|
846 |
-
def test_start_re_parsing_multiple_specific_documents_with_changing_parser_id(self):
|
847 |
-
"""
|
848 |
-
Test the re-parsing documents after changing the parser id.
|
849 |
-
"""
|
850 |
-
# create a dataset
|
851 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
852 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
853 |
-
created_res_id = created_res["data"]["dataset_id"]
|
854 |
-
# upload files
|
855 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
856 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
857 |
-
# get the doc_id
|
858 |
-
data = uploading_res["data"]
|
859 |
-
doc_ids = []
|
860 |
-
for d in data:
|
861 |
-
doc_ids.append(d["id"])
|
862 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
863 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
864 |
-
# general -> laws
|
865 |
-
params = {
|
866 |
-
"template_type": "laws"
|
867 |
-
}
|
868 |
-
ragflow.update_file(created_res_id, doc_ids[0], **params)
|
869 |
-
# re-parse
|
870 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
871 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
872 |
-
|
873 |
-
def test_start_re_parsing_multiple_specific_documents_with_changing_illegal_parser_id(self):
|
874 |
-
"""
|
875 |
-
Test the re-parsing documents after changing an illegal parser id.
|
876 |
-
"""
|
877 |
-
# create a dataset
|
878 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
879 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
880 |
-
created_res_id = created_res["data"]["dataset_id"]
|
881 |
-
# upload files
|
882 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
883 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
884 |
-
# get the doc_id
|
885 |
-
data = uploading_res["data"]
|
886 |
-
doc_ids = []
|
887 |
-
for d in data:
|
888 |
-
doc_ids.append(d["id"])
|
889 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
890 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
891 |
-
# general -> illegal
|
892 |
-
params = {
|
893 |
-
"template_type": "illegal"
|
894 |
-
}
|
895 |
-
res = ragflow.update_file(created_res_id, doc_ids[0], **params)
|
896 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'template_type' field."
|
897 |
-
# re-parse
|
898 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
899 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
900 |
-
|
901 |
-
def test_start_parsing_multiple_specific_documents_with_changing_illegal_parser_id(self):
|
902 |
-
"""
|
903 |
-
Test the parsing documents after changing an illegal parser id.
|
904 |
-
"""
|
905 |
-
# create a dataset
|
906 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
907 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
908 |
-
created_res_id = created_res["data"]["dataset_id"]
|
909 |
-
# upload files
|
910 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
911 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
912 |
-
# get the doc_id
|
913 |
-
data = uploading_res["data"]
|
914 |
-
doc_ids = []
|
915 |
-
for d in data:
|
916 |
-
doc_ids.append(d["id"])
|
917 |
-
# general -> illegal
|
918 |
-
params = {
|
919 |
-
"template_type": "illegal"
|
920 |
-
}
|
921 |
-
res = ragflow.update_file(created_res_id, doc_ids[0], **params)
|
922 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'template_type' field."
|
923 |
-
# re-parse
|
924 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
925 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
926 |
-
|
927 |
-
def test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal(self):
|
928 |
-
"""
|
929 |
-
Test the parsing documents whose dataset's parser id is illegal.
|
930 |
-
"""
|
931 |
-
# create a dataset
|
932 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
933 |
-
created_res = ragflow.create_dataset("test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal")
|
934 |
-
created_res_id = created_res["data"]["dataset_id"]
|
935 |
-
# update the parser id
|
936 |
-
params = {
|
937 |
-
"chunk_method": "illegal"
|
938 |
-
}
|
939 |
-
res = ragflow.update_dataset("test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal", **params)
|
940 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'chunk_method' field."
|
941 |
-
# upload files
|
942 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
943 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
944 |
-
# get the doc_id
|
945 |
-
data = uploading_res["data"]
|
946 |
-
doc_ids = []
|
947 |
-
for d in data:
|
948 |
-
doc_ids.append(d["id"])
|
949 |
-
# parse
|
950 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
951 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
952 |
-
|
953 |
-
# ----------------------------stop parsing-----------------------------------------------------
|
954 |
-
def test_stop_parsing_document_with_success(self):
|
955 |
-
"""
|
956 |
-
Test the stopping parsing of a document with success.
|
957 |
-
"""
|
958 |
-
# create a dataset
|
959 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
960 |
-
created_res = ragflow.create_dataset("test_start_parsing_document_with_success")
|
961 |
-
created_res_id = created_res["data"]["dataset_id"]
|
962 |
-
# upload files
|
963 |
-
file_paths = ["test_data/lol.txt"]
|
964 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
965 |
-
# get the doc_id
|
966 |
-
data = uploading_res["data"][0]
|
967 |
-
doc_id = data["id"]
|
968 |
-
# parse file
|
969 |
-
res = ragflow.start_parsing_document(created_res_id, doc_id)
|
970 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
971 |
-
res = ragflow.stop_parsing_document(created_res_id, doc_id)
|
972 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
973 |
-
|
974 |
-
def test_stop_parsing_nonexistent_document(self):
|
975 |
-
"""
|
976 |
-
Test the stopping parsing a document which does not exist.
|
977 |
-
"""
|
978 |
-
# create a dataset
|
979 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
980 |
-
created_res = ragflow.create_dataset("test_start_parsing_nonexistent_document")
|
981 |
-
created_res_id = created_res["data"]["dataset_id"]
|
982 |
-
res = ragflow.stop_parsing_document(created_res_id, "imagination.txt")
|
983 |
-
assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "This document 'imagination.txt' cannot be found!"
|
984 |
-
|
985 |
-
def test_stop_parsing_document_in_nonexistent_dataset(self):
|
986 |
-
"""
|
987 |
-
Test the stopping parsing a document whose dataset is nonexistent.
|
988 |
-
"""
|
989 |
-
# create a dataset
|
990 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
991 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
992 |
-
created_res_id = created_res["data"]["dataset_id"]
|
993 |
-
# upload files
|
994 |
-
file_paths = ["test_data/test.txt"]
|
995 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
996 |
-
# get the doc_id
|
997 |
-
data = uploading_res["data"][0]
|
998 |
-
doc_id = data["id"]
|
999 |
-
# parse
|
1000 |
-
res = ragflow.stop_parsing_document("imagination", doc_id)
|
1001 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
|
1002 |
-
|
1003 |
-
# ------------------------stop parsing multiple documents----------------------------
|
1004 |
-
def test_stop_parsing_documents_in_nonexistent_dataset(self):
|
1005 |
-
"""
|
1006 |
-
Test the stopping parsing documents whose dataset is nonexistent.
|
1007 |
-
"""
|
1008 |
-
# create a dataset
|
1009 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1010 |
-
created_res = ragflow.create_dataset("test_download_nonexistent_document")
|
1011 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1012 |
-
# upload files
|
1013 |
-
file_paths = ["test_data/test.txt"]
|
1014 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
1015 |
-
# parse
|
1016 |
-
res = ragflow.stop_parsing_documents("imagination")
|
1017 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
|
1018 |
-
|
1019 |
-
def test_stop_parsing_multiple_documents(self):
|
1020 |
-
"""
|
1021 |
-
Test the stopping parsing documents with a success.
|
1022 |
-
"""
|
1023 |
-
# create a dataset
|
1024 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1025 |
-
created_res = ragflow.create_dataset("test_start_parsing_multiple_documents")
|
1026 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1027 |
-
# upload files
|
1028 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
1029 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
1030 |
-
res = ragflow.start_parsing_documents(created_res_id)
|
1031 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
|
1032 |
-
|
1033 |
-
res = ragflow.stop_parsing_documents(created_res_id)
|
1034 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
|
1035 |
-
|
1036 |
-
def test_stop_parsing_multiple_documents_with_one_empty_file(self):
|
1037 |
-
"""
|
1038 |
-
Test the stopping parsing documents, one of which is empty.
|
1039 |
-
"""
|
1040 |
-
# create a dataset
|
1041 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1042 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
1043 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1044 |
-
# upload files
|
1045 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
|
1046 |
-
ragflow.upload_local_file(created_res_id, file_paths)
|
1047 |
-
res = ragflow.start_parsing_documents(created_res_id)
|
1048 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
|
1049 |
-
res = ragflow.stop_parsing_documents(created_res_id)
|
1050 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
|
1051 |
-
|
1052 |
-
def test_stop_parsing_multiple_specific_documents(self):
|
1053 |
-
"""
|
1054 |
-
Test the stopping parsing documents whose document ids are specified.
|
1055 |
-
"""
|
1056 |
-
# create a dataset
|
1057 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1058 |
-
created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
|
1059 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1060 |
-
# upload files
|
1061 |
-
file_paths = ["test_data/test.txt", "test_data/test1.txt"]
|
1062 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
1063 |
-
# get the doc_id
|
1064 |
-
data = uploading_res["data"]
|
1065 |
-
doc_ids = []
|
1066 |
-
for d in data:
|
1067 |
-
doc_ids.append(d["id"])
|
1068 |
-
res = ragflow.start_parsing_documents(created_res_id, doc_ids)
|
1069 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
1070 |
-
res = ragflow.stop_parsing_documents(created_res_id, doc_ids)
|
1071 |
-
assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
|
1072 |
-
|
1073 |
-
# ----------------------------show the status of the file-----------------------------------------------------
|
1074 |
-
def test_show_status_with_success(self):
|
1075 |
-
# create a dataset
|
1076 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1077 |
-
created_res = ragflow.create_dataset("test_show_status_with_success")
|
1078 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1079 |
-
# upload files
|
1080 |
-
file_paths = ["test_data/lol.txt"]
|
1081 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
1082 |
-
# get the doc_id
|
1083 |
-
data = uploading_res["data"][0]
|
1084 |
-
doc_id = data["id"]
|
1085 |
-
# parse file
|
1086 |
-
res = ragflow.start_parsing_document(created_res_id, doc_id)
|
1087 |
-
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
|
1088 |
-
# show status
|
1089 |
-
status_res = ragflow.show_parsing_status(created_res_id, doc_id)
|
1090 |
-
assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "RUNNING"
|
1091 |
-
|
1092 |
-
def test_show_status_nonexistent_document(self):
|
1093 |
-
"""
|
1094 |
-
Test showing the status of a document which does not exist.
|
1095 |
-
"""
|
1096 |
-
# create a dataset
|
1097 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1098 |
-
created_res = ragflow.create_dataset("test_show_status_nonexistent_document")
|
1099 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1100 |
-
res = ragflow.show_parsing_status(created_res_id, "imagination")
|
1101 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document."
|
1102 |
-
|
1103 |
-
def test_show_status_document_in_nonexistent_dataset(self):
|
1104 |
-
"""
|
1105 |
-
Test showing the status of a document whose dataset is nonexistent.
|
1106 |
-
"""
|
1107 |
-
# create a dataset
|
1108 |
-
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
|
1109 |
-
created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset")
|
1110 |
-
created_res_id = created_res["data"]["dataset_id"]
|
1111 |
-
# upload files
|
1112 |
-
file_paths = ["test_data/test.txt"]
|
1113 |
-
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
|
1114 |
-
# get the doc_id
|
1115 |
-
data = uploading_res["data"][0]
|
1116 |
-
doc_id = data["id"]
|
1117 |
-
# parse
|
1118 |
-
res = ragflow.show_parsing_status("imagination", doc_id)
|
1119 |
-
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!"
|
1120 |
-
# ----------------------------list the chunks of the file-----------------------------------------------------
|
1121 |
-
|
1122 |
-
# ----------------------------delete the chunk-----------------------------------------------------
|
1123 |
-
|
1124 |
-
# ----------------------------edit the status of the chunk-----------------------------------------------------
|
1125 |
-
|
1126 |
-
# ----------------------------insert a new chunk-----------------------------------------------------
|
1127 |
-
|
1128 |
-
# ----------------------------get a specific chunk-----------------------------------------------------
|
1129 |
-
|
1130 |
-
# ----------------------------retrieval test-----------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|