liuhua liuhua commited on
Commit
b7fdabe
·
1 Parent(s): 124ecb2

Delete useless files (#2889)

Browse files

### What problem does this PR solve?

Delete useless files

### Type of change


- [x] Other (please describe):
Delete useless files

Co-authored-by: liuhua <[email protected]>

api/apps/dataset_api.py DELETED
@@ -1,880 +0,0 @@
1
- #
2
- # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- import os
16
- import pathlib
17
- import re
18
- import warnings
19
- from functools import partial
20
- from io import BytesIO
21
-
22
- from elasticsearch_dsl import Q
23
- from flask import request, send_file
24
- from flask_login import login_required, current_user
25
- from httpx import HTTPError
26
-
27
- from api.contants import NAME_LENGTH_LIMIT
28
- from api.db import FileType, ParserType, FileSource, TaskStatus
29
- from api.db import StatusEnum
30
- from api.db.db_models import File
31
- from api.db.services import duplicate_name
32
- from api.db.services.document_service import DocumentService
33
- from api.db.services.file2document_service import File2DocumentService
34
- from api.db.services.file_service import FileService
35
- from api.db.services.knowledgebase_service import KnowledgebaseService
36
- from api.db.services.user_service import TenantService
37
- from api.settings import RetCode
38
- from api.utils import get_uuid
39
- from api.utils.api_utils import construct_json_result, construct_error_response
40
- from api.utils.api_utils import construct_result, validate_request
41
- from api.utils.file_utils import filename_type, thumbnail
42
- from rag.app import book, laws, manual, naive, one, paper, presentation, qa, resume, table, picture, audio, email
43
- from rag.nlp import search
44
- from rag.utils.es_conn import ELASTICSEARCH
45
- from rag.utils.storage_factory import STORAGE_IMPL
46
-
47
- MAXIMUM_OF_UPLOADING_FILES = 256
48
-
49
-
50
- # ------------------------------ create a dataset ---------------------------------------
51
-
52
- @manager.route("/", methods=["POST"])
53
- @login_required # use login
54
- @validate_request("name") # check name key
55
- def create_dataset():
56
- # Check if Authorization header is present
57
- authorization_token = request.headers.get("Authorization")
58
- if not authorization_token:
59
- return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Authorization header is missing.")
60
-
61
- # TODO: Login or API key
62
- # objs = APIToken.query(token=authorization_token)
63
- #
64
- # # Authorization error
65
- # if not objs:
66
- # return construct_json_result(code=RetCode.AUTHENTICATION_ERROR, message="Token is invalid.")
67
- #
68
- # tenant_id = objs[0].tenant_id
69
-
70
- tenant_id = current_user.id
71
- request_body = request.json
72
-
73
- # In case that there's no name
74
- if "name" not in request_body:
75
- return construct_json_result(code=RetCode.DATA_ERROR, message="Expected 'name' field in request body")
76
-
77
- dataset_name = request_body["name"]
78
-
79
- # empty dataset_name
80
- if not dataset_name:
81
- return construct_json_result(code=RetCode.DATA_ERROR, message="Empty dataset name")
82
-
83
- # In case that there's space in the head or the tail
84
- dataset_name = dataset_name.strip()
85
-
86
- # In case that the length of the name exceeds the limit
87
- dataset_name_length = len(dataset_name)
88
- if dataset_name_length > NAME_LENGTH_LIMIT:
89
- return construct_json_result(
90
- code=RetCode.DATA_ERROR,
91
- message=f"Dataset name: {dataset_name} with length {dataset_name_length} exceeds {NAME_LENGTH_LIMIT}!")
92
-
93
- # In case that there are other fields in the data-binary
94
- if len(request_body.keys()) > 1:
95
- name_list = []
96
- for key_name in request_body.keys():
97
- if key_name != "name":
98
- name_list.append(key_name)
99
- return construct_json_result(code=RetCode.DATA_ERROR,
100
- message=f"fields: {name_list}, are not allowed in request body.")
101
-
102
- # If there is a duplicate name, it will modify it to make it unique
103
- request_body["name"] = duplicate_name(
104
- KnowledgebaseService.query,
105
- name=dataset_name,
106
- tenant_id=tenant_id,
107
- status=StatusEnum.VALID.value)
108
- try:
109
- request_body["id"] = get_uuid()
110
- request_body["tenant_id"] = tenant_id
111
- request_body["created_by"] = tenant_id
112
- exist, t = TenantService.get_by_id(tenant_id)
113
- if not exist:
114
- return construct_result(code=RetCode.AUTHENTICATION_ERROR, message="Tenant not found.")
115
- request_body["embd_id"] = t.embd_id
116
- if not KnowledgebaseService.save(**request_body):
117
- # failed to create new dataset
118
- return construct_result()
119
- return construct_json_result(code=RetCode.SUCCESS,
120
- data={"dataset_name": request_body["name"], "dataset_id": request_body["id"]})
121
- except Exception as e:
122
- return construct_error_response(e)
123
-
124
-
125
- # -----------------------------list datasets-------------------------------------------------------
126
-
127
- @manager.route("/", methods=["GET"])
128
- @login_required
129
- def list_datasets():
130
- offset = request.args.get("offset", 0)
131
- count = request.args.get("count", -1)
132
- orderby = request.args.get("orderby", "create_time")
133
- desc = request.args.get("desc", True)
134
- try:
135
- tenants = TenantService.get_joined_tenants_by_user_id(current_user.id)
136
- datasets = KnowledgebaseService.get_by_tenant_ids_by_offset(
137
- [m["tenant_id"] for m in tenants], current_user.id, int(offset), int(count), orderby, desc)
138
- return construct_json_result(data=datasets, code=RetCode.SUCCESS, message=f"List datasets successfully!")
139
- except Exception as e:
140
- return construct_error_response(e)
141
- except HTTPError as http_err:
142
- return construct_json_result(http_err)
143
-
144
-
145
- # ---------------------------------delete a dataset ----------------------------
146
-
147
- @manager.route("/<dataset_id>", methods=["DELETE"])
148
- @login_required
149
- def remove_dataset(dataset_id):
150
- try:
151
- datasets = KnowledgebaseService.query(created_by=current_user.id, id=dataset_id)
152
-
153
- # according to the id, searching for the dataset
154
- if not datasets:
155
- return construct_json_result(message=f"The dataset cannot be found for your current account.",
156
- code=RetCode.OPERATING_ERROR)
157
-
158
- # Iterating the documents inside the dataset
159
- for doc in DocumentService.query(kb_id=dataset_id):
160
- if not DocumentService.remove_document(doc, datasets[0].tenant_id):
161
- # the process of deleting failed
162
- return construct_json_result(code=RetCode.DATA_ERROR,
163
- message="There was an error during the document removal process. "
164
- "Please check the status of the RAGFlow server and try the removal again.")
165
- # delete the other files
166
- f2d = File2DocumentService.get_by_document_id(doc.id)
167
- FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
168
- File2DocumentService.delete_by_document_id(doc.id)
169
-
170
- # delete the dataset
171
- if not KnowledgebaseService.delete_by_id(dataset_id):
172
- return construct_json_result(code=RetCode.DATA_ERROR,
173
- message="There was an error during the dataset removal process. "
174
- "Please check the status of the RAGFlow server and try the removal again.")
175
- # success
176
- return construct_json_result(code=RetCode.SUCCESS, message=f"Remove dataset: {dataset_id} successfully")
177
- except Exception as e:
178
- return construct_error_response(e)
179
-
180
-
181
- # ------------------------------ get details of a dataset ----------------------------------------
182
-
183
- @manager.route("/<dataset_id>", methods=["GET"])
184
- @login_required
185
- def get_dataset(dataset_id):
186
- try:
187
- dataset = KnowledgebaseService.get_detail(dataset_id)
188
- if not dataset:
189
- return construct_json_result(code=RetCode.DATA_ERROR, message="Can't find this dataset!")
190
- return construct_json_result(data=dataset, code=RetCode.SUCCESS)
191
- except Exception as e:
192
- return construct_json_result(e)
193
-
194
-
195
- # ------------------------------ update a dataset --------------------------------------------
196
-
197
- @manager.route("/<dataset_id>", methods=["PUT"])
198
- @login_required
199
- def update_dataset(dataset_id):
200
- req = request.json
201
- try:
202
- # the request cannot be empty
203
- if not req:
204
- return construct_json_result(code=RetCode.DATA_ERROR, message="Please input at least one parameter that "
205
- "you want to update!")
206
- # check whether the dataset can be found
207
- if not KnowledgebaseService.query(created_by=current_user.id, id=dataset_id):
208
- return construct_json_result(message=f"Only the owner of knowledgebase is authorized for this operation!",
209
- code=RetCode.OPERATING_ERROR)
210
-
211
- exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
212
- # check whether there is this dataset
213
- if not exist:
214
- return construct_json_result(code=RetCode.DATA_ERROR, message="This dataset cannot be found!")
215
-
216
- if "name" in req:
217
- name = req["name"].strip()
218
- # check whether there is duplicate name
219
- if name.lower() != dataset.name.lower() \
220
- and len(KnowledgebaseService.query(name=name, tenant_id=current_user.id,
221
- status=StatusEnum.VALID.value)) > 1:
222
- return construct_json_result(code=RetCode.DATA_ERROR,
223
- message=f"The name: {name.lower()} is already used by other "
224
- f"datasets. Please choose a different name.")
225
-
226
- dataset_updating_data = {}
227
- chunk_num = req.get("chunk_num")
228
- # modify the value of 11 parameters
229
-
230
- # 2 parameters: embedding id and chunk method
231
- # only if chunk_num is 0, the user can update the embedding id
232
- if req.get("embedding_model_id"):
233
- if chunk_num == 0:
234
- dataset_updating_data["embd_id"] = req["embedding_model_id"]
235
- else:
236
- return construct_json_result(code=RetCode.DATA_ERROR,
237
- message="You have already parsed the document in this "
238
- "dataset, so you cannot change the embedding "
239
- "model.")
240
- # only if chunk_num is 0, the user can update the chunk_method
241
- if "chunk_method" in req:
242
- type_value = req["chunk_method"]
243
- if is_illegal_value_for_enum(type_value, ParserType):
244
- return construct_json_result(message=f"Illegal value {type_value} for 'chunk_method' field.",
245
- code=RetCode.DATA_ERROR)
246
- if chunk_num != 0:
247
- construct_json_result(code=RetCode.DATA_ERROR, message="You have already parsed the document "
248
- "in this dataset, so you cannot "
249
- "change the chunk method.")
250
- dataset_updating_data["parser_id"] = req["template_type"]
251
-
252
- # convert the photo parameter to avatar
253
- if req.get("photo"):
254
- dataset_updating_data["avatar"] = req["photo"]
255
-
256
- # layout_recognize
257
- if "layout_recognize" in req:
258
- if "parser_config" not in dataset_updating_data:
259
- dataset_updating_data['parser_config'] = {}
260
- dataset_updating_data['parser_config']['layout_recognize'] = req['layout_recognize']
261
-
262
- # TODO: updating use_raptor needs to construct a class
263
-
264
- # 6 parameters
265
- for key in ["name", "language", "description", "permission", "id", "token_num"]:
266
- if key in req:
267
- dataset_updating_data[key] = req.get(key)
268
-
269
- # update
270
- if not KnowledgebaseService.update_by_id(dataset.id, dataset_updating_data):
271
- return construct_json_result(code=RetCode.OPERATING_ERROR, message="Failed to update! "
272
- "Please check the status of RAGFlow "
273
- "server and try again!")
274
-
275
- exist, dataset = KnowledgebaseService.get_by_id(dataset.id)
276
- if not exist:
277
- return construct_json_result(code=RetCode.DATA_ERROR, message="Failed to get the dataset "
278
- "using the dataset ID.")
279
-
280
- return construct_json_result(data=dataset.to_json(), code=RetCode.SUCCESS)
281
- except Exception as e:
282
- return construct_error_response(e)
283
-
284
-
285
- # --------------------------------content management ----------------------------------------------
286
-
287
- # ----------------------------upload files-----------------------------------------------------
288
- @manager.route("/<dataset_id>/documents/", methods=["POST"])
289
- @login_required
290
- def upload_documents(dataset_id):
291
- # no files
292
- if not request.files:
293
- return construct_json_result(
294
- message="There is no file!", code=RetCode.ARGUMENT_ERROR)
295
-
296
- # the number of uploading files exceeds the limit
297
- file_objs = request.files.getlist("file")
298
- num_file_objs = len(file_objs)
299
-
300
- if num_file_objs > MAXIMUM_OF_UPLOADING_FILES:
301
- return construct_json_result(code=RetCode.DATA_ERROR, message=f"You try to upload {num_file_objs} files, "
302
- f"which exceeds the maximum number of uploading files: {MAXIMUM_OF_UPLOADING_FILES}")
303
-
304
- # no dataset
305
- exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
306
- if not exist:
307
- return construct_json_result(message="Can't find this dataset", code=RetCode.DATA_ERROR)
308
-
309
- for file_obj in file_objs:
310
- file_name = file_obj.filename
311
- # no name
312
- if not file_name:
313
- return construct_json_result(
314
- message="There is a file without name!", code=RetCode.ARGUMENT_ERROR)
315
-
316
- # TODO: support the remote files
317
- if 'http' in file_name:
318
- return construct_json_result(code=RetCode.ARGUMENT_ERROR, message="Remote files have not unsupported.")
319
-
320
- # get the root_folder
321
- root_folder = FileService.get_root_folder(current_user.id)
322
- # get the id of the root_folder
323
- parent_file_id = root_folder["id"] # document id
324
- # this is for the new user, create '.knowledgebase' file
325
- FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
326
- # go inside this folder, get the kb_root_folder
327
- kb_root_folder = FileService.get_kb_folder(current_user.id)
328
- # link the file management to the kb_folder
329
- kb_folder = FileService.new_a_file_from_kb(dataset.tenant_id, dataset.name, kb_root_folder["id"])
330
-
331
- # grab all the errs
332
- err = []
333
- MAX_FILE_NUM_PER_USER = int(os.environ.get("MAX_FILE_NUM_PER_USER", 0))
334
- uploaded_docs_json = []
335
- for file in file_objs:
336
- try:
337
- # TODO: get this value from the database as some tenants have this limit while others don't
338
- if MAX_FILE_NUM_PER_USER > 0 and DocumentService.get_doc_count(dataset.tenant_id) >= MAX_FILE_NUM_PER_USER:
339
- return construct_json_result(code=RetCode.DATA_ERROR,
340
- message="Exceed the maximum file number of a free user!")
341
- # deal with the duplicate name
342
- filename = duplicate_name(
343
- DocumentService.query,
344
- name=file.filename,
345
- kb_id=dataset.id)
346
-
347
- # deal with the unsupported type
348
- filetype = filename_type(filename)
349
- if filetype == FileType.OTHER.value:
350
- return construct_json_result(code=RetCode.DATA_ERROR,
351
- message="This type of file has not been supported yet!")
352
-
353
- # upload to the minio
354
- location = filename
355
- while STORAGE_IMPL.obj_exist(dataset_id, location):
356
- location += "_"
357
-
358
- blob = file.read()
359
-
360
- # the content is empty, raising a warning
361
- if blob == b'':
362
- warnings.warn(f"[WARNING]: The content of the file {filename} is empty.")
363
-
364
- STORAGE_IMPL.put(dataset_id, location, blob)
365
-
366
- doc = {
367
- "id": get_uuid(),
368
- "kb_id": dataset.id,
369
- "parser_id": dataset.parser_id,
370
- "parser_config": dataset.parser_config,
371
- "created_by": current_user.id,
372
- "type": filetype,
373
- "name": filename,
374
- "location": location,
375
- "size": len(blob),
376
- "thumbnail": thumbnail(filename, blob)
377
- }
378
- if doc["type"] == FileType.VISUAL:
379
- doc["parser_id"] = ParserType.PICTURE.value
380
- if doc["type"] == FileType.AURAL:
381
- doc["parser_id"] = ParserType.AUDIO.value
382
- if re.search(r"\.(ppt|pptx|pages)$", filename):
383
- doc["parser_id"] = ParserType.PRESENTATION.value
384
- if re.search(r"\.(eml)$", filename):
385
- doc["parser_id"] = ParserType.EMAIL.value
386
- DocumentService.insert(doc)
387
-
388
- FileService.add_file_from_kb(doc, kb_folder["id"], dataset.tenant_id)
389
- uploaded_docs_json.append(doc)
390
- except Exception as e:
391
- err.append(file.filename + ": " + str(e))
392
-
393
- if err:
394
- # return all the errors
395
- return construct_json_result(message="\n".join(err), code=RetCode.SERVER_ERROR)
396
- # success
397
- return construct_json_result(data=uploaded_docs_json, code=RetCode.SUCCESS)
398
-
399
-
400
- # ----------------------------delete a file-----------------------------------------------------
401
- @manager.route("/<dataset_id>/documents/<document_id>", methods=["DELETE"])
402
- @login_required
403
- def delete_document(document_id, dataset_id): # string
404
- # get the root folder
405
- root_folder = FileService.get_root_folder(current_user.id)
406
- # parent file's id
407
- parent_file_id = root_folder["id"]
408
- # consider the new user
409
- FileService.init_knowledgebase_docs(parent_file_id, current_user.id)
410
- # store all the errors that may have
411
- errors = ""
412
- try:
413
- # whether there is this document
414
- exist, doc = DocumentService.get_by_id(document_id)
415
- if not exist:
416
- return construct_json_result(message=f"Document {document_id} not found!", code=RetCode.DATA_ERROR)
417
- # whether this doc is authorized by this tenant
418
- tenant_id = DocumentService.get_tenant_id(document_id)
419
- if not tenant_id:
420
- return construct_json_result(
421
- message=f"You cannot delete this document {document_id} due to the authorization"
422
- f" reason!", code=RetCode.AUTHENTICATION_ERROR)
423
-
424
- # get the doc's id and location
425
- real_dataset_id, location = File2DocumentService.get_storage_address(doc_id=document_id)
426
-
427
- if real_dataset_id != dataset_id:
428
- return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, "
429
- f"but in the dataset: {real_dataset_id}.", code=RetCode.ARGUMENT_ERROR)
430
-
431
- # there is an issue when removing
432
- if not DocumentService.remove_document(doc, tenant_id):
433
- return construct_json_result(
434
- message="There was an error during the document removal process. Please check the status of the "
435
- "RAGFlow server and try the removal again.", code=RetCode.OPERATING_ERROR)
436
-
437
- # fetch the File2Document record associated with the provided document ID.
438
- file_to_doc = File2DocumentService.get_by_document_id(document_id)
439
- # delete the associated File record.
440
- FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == file_to_doc[0].file_id])
441
- # delete the File2Document record itself using the document ID. This removes the
442
- # association between the document and the file after the File record has been deleted.
443
- File2DocumentService.delete_by_document_id(document_id)
444
-
445
- # delete it from minio
446
- STORAGE_IMPL.rm(dataset_id, location)
447
- except Exception as e:
448
- errors += str(e)
449
- if errors:
450
- return construct_json_result(data=False, message=errors, code=RetCode.SERVER_ERROR)
451
-
452
- return construct_json_result(data=True, code=RetCode.SUCCESS)
453
-
454
-
455
- # ----------------------------list files-----------------------------------------------------
456
- @manager.route('/<dataset_id>/documents/', methods=['GET'])
457
- @login_required
458
- def list_documents(dataset_id):
459
- if not dataset_id:
460
- return construct_json_result(
461
- data=False, message="Lack of 'dataset_id'", code=RetCode.ARGUMENT_ERROR)
462
-
463
- # searching keywords
464
- keywords = request.args.get("keywords", "")
465
-
466
- offset = request.args.get("offset", 0)
467
- count = request.args.get("count", -1)
468
- order_by = request.args.get("order_by", "create_time")
469
- descend = request.args.get("descend", True)
470
- try:
471
- docs, total = DocumentService.list_documents_in_dataset(dataset_id, int(offset), int(count), order_by,
472
- descend, keywords)
473
-
474
- return construct_json_result(data={"total": total, "docs": docs}, message=RetCode.SUCCESS)
475
- except Exception as e:
476
- return construct_error_response(e)
477
-
478
-
479
- # ----------------------------update: enable rename-----------------------------------------------------
480
- @manager.route("/<dataset_id>/documents/<document_id>", methods=["PUT"])
481
- @login_required
482
- def update_document(dataset_id, document_id):
483
- req = request.json
484
- try:
485
- legal_parameters = set()
486
- legal_parameters.add("name")
487
- legal_parameters.add("enable")
488
- legal_parameters.add("template_type")
489
-
490
- for key in req.keys():
491
- if key not in legal_parameters:
492
- return construct_json_result(code=RetCode.ARGUMENT_ERROR, message=f"{key} is an illegal parameter.")
493
-
494
- # The request body cannot be empty
495
- if not req:
496
- return construct_json_result(
497
- code=RetCode.DATA_ERROR,
498
- message="Please input at least one parameter that you want to update!")
499
-
500
- # Check whether there is this dataset
501
- exist, dataset = KnowledgebaseService.get_by_id(dataset_id)
502
- if not exist:
503
- return construct_json_result(code=RetCode.DATA_ERROR, message=f"This dataset {dataset_id} cannot be found!")
504
-
505
- # The document does not exist
506
- exist, document = DocumentService.get_by_id(document_id)
507
- if not exist:
508
- return construct_json_result(message=f"This document {document_id} cannot be found!",
509
- code=RetCode.ARGUMENT_ERROR)
510
-
511
- # Deal with the different keys
512
- updating_data = {}
513
- if "name" in req:
514
- new_name = req["name"]
515
- updating_data["name"] = new_name
516
- # Check whether the new_name is suitable
517
- # 1. no name value
518
- if not new_name:
519
- return construct_json_result(code=RetCode.DATA_ERROR, message="There is no new name.")
520
-
521
- # 2. In case that there's space in the head or the tail
522
- new_name = new_name.strip()
523
-
524
- # 3. Check whether the new_name has the same extension of file as before
525
- if pathlib.Path(new_name.lower()).suffix != pathlib.Path(
526
- document.name.lower()).suffix:
527
- return construct_json_result(
528
- data=False,
529
- message="The extension of file cannot be changed",
530
- code=RetCode.ARGUMENT_ERROR)
531
-
532
- # 4. Check whether the new name has already been occupied by other file
533
- for d in DocumentService.query(name=new_name, kb_id=document.kb_id):
534
- if d.name == new_name:
535
- return construct_json_result(
536
- message="Duplicated document name in the same dataset.",
537
- code=RetCode.ARGUMENT_ERROR)
538
-
539
- if "enable" in req:
540
- enable_value = req["enable"]
541
- if is_illegal_value_for_enum(enable_value, StatusEnum):
542
- return construct_json_result(message=f"Illegal value {enable_value} for 'enable' field.",
543
- code=RetCode.DATA_ERROR)
544
- updating_data["status"] = enable_value
545
-
546
- # TODO: Chunk-method - update parameters inside the json object parser_config
547
- if "template_type" in req:
548
- type_value = req["template_type"]
549
- if is_illegal_value_for_enum(type_value, ParserType):
550
- return construct_json_result(message=f"Illegal value {type_value} for 'template_type' field.",
551
- code=RetCode.DATA_ERROR)
552
- updating_data["parser_id"] = req["template_type"]
553
-
554
- # The process of updating
555
- if not DocumentService.update_by_id(document_id, updating_data):
556
- return construct_json_result(
557
- code=RetCode.OPERATING_ERROR,
558
- message="Failed to update document in the database! "
559
- "Please check the status of RAGFlow server and try again!")
560
-
561
- # name part: file service
562
- if "name" in req:
563
- # Get file by document id
564
- file_information = File2DocumentService.get_by_document_id(document_id)
565
- if file_information:
566
- exist, file = FileService.get_by_id(file_information[0].file_id)
567
- FileService.update_by_id(file.id, {"name": req["name"]})
568
-
569
- exist, document = DocumentService.get_by_id(document_id)
570
-
571
- # Success
572
- return construct_json_result(data=document.to_json(), message="Success", code=RetCode.SUCCESS)
573
- except Exception as e:
574
- return construct_error_response(e)
575
-
576
-
577
- # Helper method to judge whether it's an illegal value
578
- def is_illegal_value_for_enum(value, enum_class):
579
- return value not in enum_class.__members__.values()
580
-
581
-
582
- # ----------------------------download a file-----------------------------------------------------
583
- @manager.route("/<dataset_id>/documents/<document_id>", methods=["GET"])
584
- @login_required
585
- def download_document(dataset_id, document_id):
586
- try:
587
- # Check whether there is this dataset
588
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
589
- if not exist:
590
- return construct_json_result(code=RetCode.DATA_ERROR,
591
- message=f"This dataset '{dataset_id}' cannot be found!")
592
-
593
- # Check whether there is this document
594
- exist, document = DocumentService.get_by_id(document_id)
595
- if not exist:
596
- return construct_json_result(message=f"This document '{document_id}' cannot be found!",
597
- code=RetCode.ARGUMENT_ERROR)
598
-
599
- # The process of downloading
600
- doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
601
- file_stream = STORAGE_IMPL.get(doc_id, doc_location)
602
- if not file_stream:
603
- return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
604
-
605
- file = BytesIO(file_stream)
606
-
607
- # Use send_file with a proper filename and MIME type
608
- return send_file(
609
- file,
610
- as_attachment=True,
611
- download_name=document.name,
612
- mimetype='application/octet-stream' # Set a default MIME type
613
- )
614
-
615
- # Error
616
- except Exception as e:
617
- return construct_error_response(e)
618
-
619
-
620
- # ----------------------------start parsing a document-----------------------------------------------------
621
- # helper method for parsing
622
- # callback method
623
- def doc_parse_callback(doc_id, prog=None, msg=""):
624
- cancel = DocumentService.do_cancel(doc_id)
625
- if cancel:
626
- raise Exception("The parsing process has been cancelled!")
627
-
628
- """
629
- def doc_parse(binary, doc_name, parser_name, tenant_id, doc_id):
630
- match parser_name:
631
- case "book":
632
- book.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
633
- case "laws":
634
- laws.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
635
- case "manual":
636
- manual.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
637
- case "naive":
638
- # It's the mode by default, which is general in the front-end
639
- naive.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
640
- case "one":
641
- one.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
642
- case "paper":
643
- paper.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
644
- case "picture":
645
- picture.chunk(doc_name, binary=binary, tenant_id=tenant_id, lang="Chinese",
646
- callback=partial(doc_parse_callback, doc_id))
647
- case "presentation":
648
- presentation.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
649
- case "qa":
650
- qa.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
651
- case "resume":
652
- resume.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
653
- case "table":
654
- table.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
655
- case "audio":
656
- audio.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
657
- case "email":
658
- email.chunk(doc_name, binary=binary, callback=partial(doc_parse_callback, doc_id))
659
- case _:
660
- return False
661
-
662
- return True
663
- """
664
-
665
-
666
- @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["POST"])
667
- @login_required
668
- def parse_document(dataset_id, document_id):
669
- try:
670
- # valid dataset
671
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
672
- if not exist:
673
- return construct_json_result(code=RetCode.DATA_ERROR,
674
- message=f"This dataset '{dataset_id}' cannot be found!")
675
-
676
- return parsing_document_internal(document_id)
677
-
678
- except Exception as e:
679
- return construct_error_response(e)
680
-
681
-
682
- # ----------------------------start parsing documents-----------------------------------------------------
683
- @manager.route("/<dataset_id>/documents/status", methods=["POST"])
684
- @login_required
685
- def parse_documents(dataset_id):
686
- doc_ids = request.json["doc_ids"]
687
- try:
688
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
689
- if not exist:
690
- return construct_json_result(code=RetCode.DATA_ERROR,
691
- message=f"This dataset '{dataset_id}' cannot be found!")
692
- # two conditions
693
- if not doc_ids:
694
- # documents inside the dataset
695
- docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
696
- True, "")
697
- doc_ids = [doc["id"] for doc in docs]
698
-
699
- message = ""
700
- # for loop
701
- for id in doc_ids:
702
- res = parsing_document_internal(id)
703
- res_body = res.json
704
- if res_body["code"] == RetCode.SUCCESS:
705
- message += res_body["message"]
706
- else:
707
- return res
708
- return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
709
-
710
- except Exception as e:
711
- return construct_error_response(e)
712
-
713
-
714
- # helper method for parsing the document
715
- def parsing_document_internal(id):
716
- message = ""
717
- try:
718
- # Check whether there is this document
719
- exist, document = DocumentService.get_by_id(id)
720
- if not exist:
721
- return construct_json_result(message=f"This document '{id}' cannot be found!",
722
- code=RetCode.ARGUMENT_ERROR)
723
-
724
- tenant_id = DocumentService.get_tenant_id(id)
725
- if not tenant_id:
726
- return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
727
-
728
- info = {"run": "1", "progress": 0}
729
- info["progress_msg"] = ""
730
- info["chunk_num"] = 0
731
- info["token_num"] = 0
732
-
733
- DocumentService.update_by_id(id, info)
734
-
735
- ELASTICSEARCH.deleteByQuery(Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
736
-
737
- _, doc_attributes = DocumentService.get_by_id(id)
738
- doc_attributes = doc_attributes.to_dict()
739
- doc_id = doc_attributes["id"]
740
-
741
- bucket, doc_name = File2DocumentService.get_storage_address(doc_id=doc_id)
742
- binary = STORAGE_IMPL.get(bucket, doc_name)
743
- parser_name = doc_attributes["parser_id"]
744
- if binary:
745
- res = doc_parse(binary, doc_name, parser_name, tenant_id, doc_id)
746
- if res is False:
747
- message += f"The parser id: {parser_name} of the document {doc_id} is not supported; "
748
- else:
749
- message += f"Empty data in the document: {doc_name}; "
750
- # failed in parsing
751
- if doc_attributes["status"] == TaskStatus.FAIL.value:
752
- message += f"Failed in parsing the document: {doc_id}; "
753
- return construct_json_result(code=RetCode.SUCCESS, message=message)
754
- except Exception as e:
755
- return construct_error_response(e)
756
-
757
-
758
- # ----------------------------stop parsing a doc-----------------------------------------------------
759
- @manager.route("<dataset_id>/documents/<document_id>/status", methods=["DELETE"])
760
- @login_required
761
- def stop_parsing_document(dataset_id, document_id):
762
- try:
763
- # valid dataset
764
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
765
- if not exist:
766
- return construct_json_result(code=RetCode.DATA_ERROR,
767
- message=f"This dataset '{dataset_id}' cannot be found!")
768
-
769
- return stop_parsing_document_internal(document_id)
770
-
771
- except Exception as e:
772
- return construct_error_response(e)
773
-
774
-
775
- # ----------------------------stop parsing docs-----------------------------------------------------
776
- @manager.route("<dataset_id>/documents/status", methods=["DELETE"])
777
- @login_required
778
- def stop_parsing_documents(dataset_id):
779
- doc_ids = request.json["doc_ids"]
780
- try:
781
- # valid dataset?
782
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
783
- if not exist:
784
- return construct_json_result(code=RetCode.DATA_ERROR,
785
- message=f"This dataset '{dataset_id}' cannot be found!")
786
- if not doc_ids:
787
- # documents inside the dataset
788
- docs, total = DocumentService.list_documents_in_dataset(dataset_id, 0, -1, "create_time",
789
- True, "")
790
- doc_ids = [doc["id"] for doc in docs]
791
-
792
- message = ""
793
- # for loop
794
- for id in doc_ids:
795
- res = stop_parsing_document_internal(id)
796
- res_body = res.json
797
- if res_body["code"] == RetCode.SUCCESS:
798
- message += res_body["message"]
799
- else:
800
- return res
801
- return construct_json_result(data=True, code=RetCode.SUCCESS, message=message)
802
-
803
- except Exception as e:
804
- return construct_error_response(e)
805
-
806
-
807
- # Helper method
808
- def stop_parsing_document_internal(document_id):
809
- try:
810
- # valid doc?
811
- exist, doc = DocumentService.get_by_id(document_id)
812
- if not exist:
813
- return construct_json_result(message=f"This document '{document_id}' cannot be found!",
814
- code=RetCode.ARGUMENT_ERROR)
815
- doc_attributes = doc.to_dict()
816
-
817
- # only when the status is parsing, we need to stop it
818
- if doc_attributes["status"] == TaskStatus.RUNNING.value:
819
- tenant_id = DocumentService.get_tenant_id(document_id)
820
- if not tenant_id:
821
- return construct_json_result(message="Tenant not found!", code=RetCode.AUTHENTICATION_ERROR)
822
-
823
- # update successfully?
824
- if not DocumentService.update_by_id(document_id, {"status": "2"}): # cancel
825
- return construct_json_result(
826
- code=RetCode.OPERATING_ERROR,
827
- message="There was an error during the stopping parsing the document process. "
828
- "Please check the status of the RAGFlow server and try the update again."
829
- )
830
-
831
- _, doc_attributes = DocumentService.get_by_id(document_id)
832
- doc_attributes = doc_attributes.to_dict()
833
-
834
- # failed in stop parsing
835
- if doc_attributes["status"] == TaskStatus.RUNNING.value:
836
- return construct_json_result(message=f"Failed in parsing the document: {document_id}; ", code=RetCode.SUCCESS)
837
- return construct_json_result(code=RetCode.SUCCESS, message="")
838
- except Exception as e:
839
- return construct_error_response(e)
840
-
841
-
842
- # ----------------------------show the status of the file-----------------------------------------------------
843
- @manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
844
- @login_required
845
- def show_parsing_status(dataset_id, document_id):
846
- try:
847
- # valid dataset
848
- exist, _ = KnowledgebaseService.get_by_id(dataset_id)
849
- if not exist:
850
- return construct_json_result(code=RetCode.DATA_ERROR,
851
- message=f"This dataset: '{dataset_id}' cannot be found!")
852
- # valid document
853
- exist, _ = DocumentService.get_by_id(document_id)
854
- if not exist:
855
- return construct_json_result(code=RetCode.DATA_ERROR,
856
- message=f"This document: '{document_id}' is not a valid document.")
857
-
858
- _, doc = DocumentService.get_by_id(document_id) # get doc object
859
- doc_attributes = doc.to_dict()
860
-
861
- return construct_json_result(
862
- data={"progress": doc_attributes["progress"], "status": TaskStatus(doc_attributes["status"]).name},
863
- code=RetCode.SUCCESS
864
- )
865
- except Exception as e:
866
- return construct_error_response(e)
867
-
868
- # ----------------------------list the chunks of the file-----------------------------------------------------
869
-
870
- # -- --------------------------delete the chunk-----------------------------------------------------
871
-
872
- # ----------------------------edit the status of the chunk-----------------------------------------------------
873
-
874
- # ----------------------------insert a new chunk-----------------------------------------------------
875
-
876
- # ----------------------------upload a file-----------------------------------------------------
877
-
878
- # ----------------------------get a specific chunk-----------------------------------------------------
879
-
880
- # ----------------------------retrieval test-----------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sdk/python/test/test_basic.py DELETED
@@ -1,48 +0,0 @@
1
- from test_sdkbase import TestSdk
2
- import ragflow
3
- from ragflow.ragflow import RAGFLow
4
- import pytest
5
- from unittest.mock import MagicMock
6
- from common import API_KEY, HOST_ADDRESS
7
-
8
-
9
- class TestBasic(TestSdk):
10
-
11
- def test_version(self):
12
- print(ragflow.__version__)
13
-
14
- # def test_create_dataset(self):
15
- # res = RAGFLow(API_KEY, HOST_ADDRESS).create_dataset('abc')
16
- # print(res)
17
- #
18
- # def test_delete_dataset(self):
19
- # assert RAGFLow('123', 'url').delete_dataset('abc') == 'abc'
20
- #
21
- # def test_list_dataset_success(self, ragflow_instance, monkeypatch):
22
- # # Mocking the response of requests.get method
23
- # mock_response = MagicMock()
24
- # mock_response.status_code = 200
25
- # mock_response.json.return_value = {'datasets': [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]}
26
- #
27
- # # Patching requests.get to return the mock_response
28
- # monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
29
- #
30
- # # Call the method under test
31
- # result = ragflow_instance.list_dataset()
32
- #
33
- # # Assertion
34
- # assert result == [{'id': 1, 'name': 'dataset1'}, {'id': 2, 'name': 'dataset2'}]
35
- #
36
- # def test_list_dataset_failure(self, ragflow_instance, monkeypatch):
37
- # # Mocking the response of requests.get method
38
- # mock_response = MagicMock()
39
- # mock_response.status_code = 404 # Simulating a failed request
40
- #
41
- # # Patching requests.get to return the mock_response
42
- # monkeypatch.setattr("requests.get", MagicMock(return_value=mock_response))
43
- #
44
- # # Call the method under test
45
- # result = ragflow_instance.list_dataset()
46
- #
47
- # # Assertion
48
- # assert result is None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sdk/python/test/test_dataset.py DELETED
@@ -1,468 +0,0 @@
1
- from api.settings import RetCode
2
- from test_sdkbase import TestSdk
3
- from ragflow import RAGFlow
4
- import pytest
5
- from common import API_KEY, HOST_ADDRESS
6
- from api.contants import NAME_LENGTH_LIMIT
7
-
8
-
9
- class TestDataset(TestSdk):
10
- """
11
- This class contains a suite of tests for the dataset management functionality within the RAGFlow system.
12
- It ensures that the following functionalities as expected:
13
- 1. create a kb
14
- 2. list the kb
15
- 3. get the detail info according to the kb id
16
- 4. update the kb
17
- 5. delete the kb
18
- """
19
-
20
- def setup_method(self):
21
- """
22
- Delete all the datasets.
23
- """
24
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
25
- listed_data = ragflow.list_dataset()
26
- listed_data = listed_data['data']
27
-
28
- listed_names = {d['name'] for d in listed_data}
29
- for name in listed_names:
30
- ragflow.delete_dataset(name)
31
-
32
- # -----------------------create_dataset---------------------------------
33
- def test_create_dataset_with_success(self):
34
- """
35
- Test the creation of a new dataset with success.
36
- """
37
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
38
- # create a kb
39
- res = ragflow.create_dataset("kb1")
40
- assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
41
-
42
- def test_create_dataset_with_empty_name(self):
43
- """
44
- Test the creation of a new dataset with an empty name.
45
- """
46
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
47
- res = ragflow.create_dataset("")
48
- assert res['message'] == 'Empty dataset name' and res['code'] == RetCode.DATA_ERROR
49
-
50
- def test_create_dataset_with_name_exceeding_limit(self):
51
- """
52
- Test the creation of a new dataset with the length of name exceeding the limit.
53
- """
54
- name = "k" * NAME_LENGTH_LIMIT + "b"
55
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
56
- res = ragflow.create_dataset(name)
57
- assert (res['message'] == f"Dataset name: {name} with length {len(name)} exceeds {NAME_LENGTH_LIMIT}!"
58
- and res['code'] == RetCode.DATA_ERROR)
59
-
60
- def test_create_dataset_name_with_space_in_the_middle(self):
61
- """
62
- Test the creation of a new dataset whose name has space in the middle.
63
- """
64
- name = "k b"
65
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
66
- res = ragflow.create_dataset(name)
67
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
68
-
69
- def test_create_dataset_name_with_space_in_the_head(self):
70
- """
71
- Test the creation of a new dataset whose name has space in the head.
72
- """
73
- name = " kb"
74
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
75
- res = ragflow.create_dataset(name)
76
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
77
-
78
- def test_create_dataset_name_with_space_in_the_tail(self):
79
- """
80
- Test the creation of a new dataset whose name has space in the tail.
81
- """
82
- name = "kb "
83
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
84
- res = ragflow.create_dataset(name)
85
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
86
-
87
- def test_create_dataset_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
88
- """
89
- Test the creation of a new dataset whose name has space in the head and tail,
90
- and the length of the name exceeds the limit.
91
- """
92
- name = " " + "k" * NAME_LENGTH_LIMIT + " "
93
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
94
- res = ragflow.create_dataset(name)
95
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
96
-
97
- def test_create_dataset_with_two_same_name(self):
98
- """
99
- Test the creation of two new datasets with the same name.
100
- """
101
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
102
- res = ragflow.create_dataset("kb")
103
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
104
- res = ragflow.create_dataset("kb")
105
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
106
-
107
- def test_create_dataset_with_only_space_in_the_name(self):
108
- """
109
- Test the creation of a dataset whose name only has space.
110
- """
111
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
112
- res = ragflow.create_dataset(" ")
113
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
114
-
115
- def test_create_dataset_with_space_number_exceeding_limit(self):
116
- """
117
- Test the creation of a dataset with a name that only has space exceeds the allowed limit.
118
- """
119
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
120
- name = " " * NAME_LENGTH_LIMIT
121
- res = ragflow.create_dataset(name)
122
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
123
-
124
- def test_create_dataset_with_name_having_return(self):
125
- """
126
- Test the creation of a dataset with a name that has return symbol.
127
- """
128
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
129
- name = "kb\n"
130
- res = ragflow.create_dataset(name)
131
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
132
-
133
- def test_create_dataset_with_name_having_the_null_character(self):
134
- """
135
- Test the creation of a dataset with a name that has the null character.
136
- """
137
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
138
- name = "kb\0"
139
- res = ragflow.create_dataset(name)
140
- assert (res['code'] == RetCode.SUCCESS and res['message'] == 'success')
141
-
142
- # -----------------------list_dataset---------------------------------
143
- def test_list_dataset_success(self):
144
- """
145
- Test listing datasets with a successful outcome.
146
- """
147
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
148
- # Call the list_datasets method
149
- response = ragflow.list_dataset()
150
- assert response['code'] == RetCode.SUCCESS
151
-
152
- def test_list_dataset_with_checking_size_and_name(self):
153
- """
154
- Test listing datasets and verify the size and names of the datasets.
155
- """
156
- datasets_to_create = ["dataset1", "dataset2", "dataset3"]
157
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
158
- created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
159
-
160
- real_name_to_create = set()
161
- for response in created_response:
162
- assert 'data' in response, "Response is missing 'data' key"
163
- dataset_name = response['data']['dataset_name']
164
- real_name_to_create.add(dataset_name)
165
-
166
- response = ragflow.list_dataset(0, 3)
167
- listed_data = response['data']
168
-
169
- listed_names = {d['name'] for d in listed_data}
170
- assert listed_names == real_name_to_create
171
- assert response['code'] == RetCode.SUCCESS
172
- assert len(listed_data) == len(datasets_to_create)
173
-
174
- def test_list_dataset_with_getting_empty_result(self):
175
- """
176
- Test listing datasets that should be empty.
177
- """
178
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
179
- datasets_to_create = []
180
- created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
181
-
182
- real_name_to_create = set()
183
- for response in created_response:
184
- assert 'data' in response, "Response is missing 'data' key"
185
- dataset_name = response['data']['dataset_name']
186
- real_name_to_create.add(dataset_name)
187
-
188
- response = ragflow.list_dataset(0, 0)
189
- listed_data = response['data']
190
-
191
- listed_names = {d['name'] for d in listed_data}
192
-
193
- assert listed_names == real_name_to_create
194
- assert response['code'] == RetCode.SUCCESS
195
- assert len(listed_data) == 0
196
-
197
- def test_list_dataset_with_creating_100_knowledge_bases(self):
198
- """
199
- Test listing 100 datasets and verify the size and names of these datasets.
200
- """
201
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
202
- datasets_to_create = ["dataset1"] * 100
203
- created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
204
-
205
- real_name_to_create = set()
206
- for response in created_response:
207
- assert 'data' in response, "Response is missing 'data' key"
208
- dataset_name = response['data']['dataset_name']
209
- real_name_to_create.add(dataset_name)
210
-
211
- res = ragflow.list_dataset(0, 100)
212
- listed_data = res['data']
213
-
214
- listed_names = {d['name'] for d in listed_data}
215
- assert listed_names == real_name_to_create
216
- assert res['code'] == RetCode.SUCCESS
217
- assert len(listed_data) == 100
218
-
219
- def test_list_dataset_with_showing_one_dataset(self):
220
- """
221
- Test listing one dataset and verify the size of the dataset.
222
- """
223
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
224
- response = ragflow.list_dataset(0, 1)
225
- datasets = response['data']
226
- assert len(datasets) == 1 and response['code'] == RetCode.SUCCESS
227
-
228
- def test_list_dataset_failure(self):
229
- """
230
- Test listing datasets with IndexError.
231
- """
232
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
233
- response = ragflow.list_dataset(-1, -1)
234
- assert "IndexError" in response['message'] and response['code'] == RetCode.EXCEPTION_ERROR
235
-
236
- def test_list_dataset_for_empty_datasets(self):
237
- """
238
- Test listing datasets when the datasets are empty.
239
- """
240
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
241
- response = ragflow.list_dataset()
242
- datasets = response['data']
243
- assert len(datasets) == 0 and response['code'] == RetCode.SUCCESS
244
-
245
- # TODO: have to set the limitation of the number of datasets
246
-
247
- # -----------------------delete_dataset---------------------------------
248
- def test_delete_one_dataset_with_success(self):
249
- """
250
- Test deleting a dataset with success.
251
- """
252
- # get the real name of the created dataset
253
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
254
- res = ragflow.create_dataset("kb0")
255
- real_dataset_name = res['data']['dataset_name']
256
- # delete this dataset
257
- res = ragflow.delete_dataset(real_dataset_name)
258
- assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
259
-
260
- def test_delete_dataset_with_not_existing_dataset(self):
261
- """
262
- Test deleting a dataset that does not exist with failure.
263
- """
264
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
265
- res = ragflow.delete_dataset("weird_dataset")
266
- assert res['code'] == RetCode.OPERATING_ERROR and res['message'] == 'The dataset cannot be found for your current account.'
267
-
268
- def test_delete_dataset_with_creating_100_datasets_and_deleting_100_datasets(self):
269
- """
270
- Test deleting a dataset when creating 100 datasets and deleting 100 datasets.
271
- """
272
- # create 100 datasets
273
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
274
- datasets_to_create = ["dataset1"] * 100
275
- created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
276
-
277
- real_name_to_create = set()
278
- for response in created_response:
279
- assert 'data' in response, "Response is missing 'data' key"
280
- dataset_name = response['data']['dataset_name']
281
- real_name_to_create.add(dataset_name)
282
-
283
- for name in real_name_to_create:
284
- res = ragflow.delete_dataset(name)
285
- assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
286
-
287
- def test_delete_dataset_with_space_in_the_middle_of_the_name(self):
288
- """
289
- Test deleting a dataset when its name has space in the middle.
290
- """
291
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
292
- ragflow.create_dataset("k b")
293
- res = ragflow.delete_dataset("k b")
294
- assert res['code'] == RetCode.SUCCESS and 'successfully' in res['message']
295
-
296
- def test_delete_dataset_with_space_in_the_head_of_the_name(self):
297
- """
298
- Test deleting a dataset when its name has space in the head.
299
- """
300
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
301
- ragflow.create_dataset(" kb")
302
- res = ragflow.delete_dataset(" kb")
303
- assert (res['code'] == RetCode.OPERATING_ERROR
304
- and res['message'] == 'The dataset cannot be found for your current account.')
305
-
306
- def test_delete_dataset_with_space_in_the_tail_of_the_name(self):
307
- """
308
- Test deleting a dataset when its name has space in the tail.
309
- """
310
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
311
- ragflow.create_dataset("kb ")
312
- res = ragflow.delete_dataset("kb ")
313
- assert (res['code'] == RetCode.OPERATING_ERROR
314
- and res['message'] == 'The dataset cannot be found for your current account.')
315
-
316
- def test_delete_dataset_with_only_space_in_the_name(self):
317
- """
318
- Test deleting a dataset when its name only has space.
319
- """
320
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
321
- ragflow.create_dataset(" ")
322
- res = ragflow.delete_dataset(" ")
323
- assert (res['code'] == RetCode.OPERATING_ERROR
324
- and res['message'] == 'The dataset cannot be found for your current account.')
325
-
326
- def test_delete_dataset_with_only_exceeding_limit_space_in_the_name(self):
327
- """
328
- Test deleting a dataset when its name only has space and the number of it exceeds the limit.
329
- """
330
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
331
- name = " " * (NAME_LENGTH_LIMIT + 1)
332
- ragflow.create_dataset(name)
333
- res = ragflow.delete_dataset(name)
334
- assert (res['code'] == RetCode.OPERATING_ERROR
335
- and res['message'] == 'The dataset cannot be found for your current account.')
336
-
337
- def test_delete_dataset_with_name_with_space_in_the_head_and_tail_and_length_exceed_limit(self):
338
- """
339
- Test deleting a dataset whose name has space in the head and tail,
340
- and the length of the name exceeds the limit.
341
- """
342
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
343
- name = " " + "k" * NAME_LENGTH_LIMIT + " "
344
- ragflow.create_dataset(name)
345
- res = ragflow.delete_dataset(name)
346
- assert (res['code'] == RetCode.OPERATING_ERROR
347
- and res['message'] == 'The dataset cannot be found for your current account.')
348
-
349
- # ---------------------------------get_dataset-----------------------------------------
350
-
351
- def test_get_dataset_with_success(self):
352
- """
353
- Test getting a dataset which exists.
354
- """
355
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
356
- response = ragflow.create_dataset("test")
357
- dataset_name = response['data']['dataset_name']
358
- res = ragflow.get_dataset(dataset_name)
359
- assert res['code'] == RetCode.SUCCESS and res['data']['name'] == dataset_name
360
-
361
- def test_get_dataset_with_failure(self):
362
- """
363
- Test getting a dataset which does not exist.
364
- """
365
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
366
- res = ragflow.get_dataset("weird_dataset")
367
- assert res['code'] == RetCode.DATA_ERROR and res['message'] == "Can't find this dataset!"
368
-
369
- # ---------------------------------update a dataset-----------------------------------
370
-
371
- def test_update_dataset_without_existing_dataset(self):
372
- """
373
- Test updating a dataset which does not exist.
374
- """
375
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
376
- params = {
377
- 'name': 'new_name3',
378
- 'description': 'new_description',
379
- "permission": 'me',
380
- "parser_id": 'naive',
381
- "language": 'English'
382
- }
383
- res = ragflow.update_dataset("weird_dataset", **params)
384
- assert (res['code'] == RetCode.OPERATING_ERROR
385
- and res['message'] == 'Only the owner of knowledgebase is authorized for this operation!')
386
-
387
- def test_update_dataset_with_updating_six_parameters(self):
388
- """
389
- Test updating a dataset when updating six parameters.
390
- """
391
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
392
- ragflow.create_dataset("new_name1")
393
- params = {
394
- 'name': 'new_name',
395
- 'description': 'new_description1',
396
- "permission": 'me',
397
- "parser_id": 'naive',
398
- "language": 'English'
399
- }
400
- res = ragflow.update_dataset("new_name1", **params)
401
- assert res['code'] == RetCode.SUCCESS
402
- assert (res['data']['description'] == 'new_description1'
403
- and res['data']['name'] == 'new_name' and res['data']['permission'] == 'me'
404
- and res['data']['language'] == 'English' and res['data']['parser_id'] == 'naive')
405
-
406
- def test_update_dataset_with_updating_two_parameters(self):
407
- """
408
- Test updating a dataset when updating two parameters.
409
- """
410
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
411
- ragflow.create_dataset("new_name2")
412
- params = {
413
- "name": "new_name3",
414
- "language": 'English'
415
- }
416
- res = ragflow.update_dataset("new_name2", **params)
417
- assert (res['code'] == RetCode.SUCCESS and res['data']['name'] == "new_name3"
418
- and res['data']['language'] == 'English')
419
-
420
- def test_update_dataset_with_updating_layout_recognize(self):
421
- """Test updating a dataset with only updating the layout_recognize"""
422
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
423
- ragflow.create_dataset("test_update_dataset_with_updating_layout_recognize")
424
- params = {
425
- "layout_recognize": False
426
- }
427
- res = ragflow.update_dataset("test_update_dataset_with_updating_layout_recognize", **params)
428
- assert res['code'] == RetCode.SUCCESS and res['data']['parser_config']['layout_recognize'] is False
429
-
430
- def test_update_dataset_with_empty_parameter(self):
431
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
432
- ragflow.create_dataset("test_update_dataset_with_empty_parameter")
433
- params = {}
434
- res = ragflow.update_dataset("test_update_dataset_with_empty_parameter", **params)
435
- assert (res['code'] == RetCode.DATA_ERROR
436
- and res['message'] == 'Please input at least one parameter that you want to update!')
437
-
438
- # ---------------------------------mix the different methods--------------------------
439
-
440
- def test_create_and_delete_dataset_together(self):
441
- """
442
- Test creating 1 dataset, and then deleting 1 dataset.
443
- Test creating 10 datasets, and then deleting 10 datasets.
444
- """
445
- # create 1 dataset
446
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
447
- res = ragflow.create_dataset("ddd")
448
- assert res['code'] == RetCode.SUCCESS and res['message'] == 'success'
449
-
450
- # delete 1 dataset
451
- res = ragflow.delete_dataset("ddd")
452
- assert res["code"] == RetCode.SUCCESS
453
-
454
- # create 10 datasets
455
- datasets_to_create = ["dataset1"] * 10
456
- created_response = [ragflow.create_dataset(name) for name in datasets_to_create]
457
-
458
- real_name_to_create = set()
459
- for response in created_response:
460
- assert 'data' in response, "Response is missing 'data' key"
461
- dataset_name = response['data']['dataset_name']
462
- real_name_to_create.add(dataset_name)
463
-
464
- # delete 10 datasets
465
- for name in real_name_to_create:
466
- res = ragflow.delete_dataset(name)
467
- assert res["code"] == RetCode.SUCCESS
468
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sdk/python/test/test_document.py DELETED
@@ -1,1130 +0,0 @@
1
- from api.settings import RetCode
2
- from test_sdkbase import TestSdk
3
- from ragflow import RAGFlow
4
- import pytest
5
- from common import API_KEY, HOST_ADDRESS
6
-
7
-
8
- class TestFile(TestSdk):
9
- """
10
- This class contains a suite of tests for the content management functionality within the dataset.
11
- It ensures that the following functionalities as expected:
12
- 1. upload local files
13
- 2. upload remote files
14
- 3. download a file
15
- 4. delete a file
16
- 5. enable rename
17
- 6. list files
18
- 7. start parsing
19
- 8. end parsing
20
- 9. check the status of the file
21
- 10. list the chunks
22
- 11. delete a chunk
23
- 12. insert a new chunk
24
- 13. edit the status of chunk
25
- 14. get the specific chunk
26
- 15. retrieval test
27
- """
28
-
29
- # ----------------------------upload local files-----------------------------------------------------
30
- def test_upload_two_files(self):
31
- """
32
- Test uploading two files with success.
33
- """
34
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
35
- created_res = ragflow.create_dataset("test_upload_two_files")
36
- dataset_id = created_res["data"]["dataset_id"]
37
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
38
- res = ragflow.upload_local_file(dataset_id, file_paths)
39
- assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
40
-
41
- def test_upload_one_file(self):
42
- """
43
- Test uploading one file with success.
44
- """
45
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
46
- created_res = ragflow.create_dataset("test_upload_one_file")
47
- dataset_id = created_res["data"]["dataset_id"]
48
- file_paths = ["test_data/test.txt"]
49
- res = ragflow.upload_local_file(dataset_id, file_paths)
50
- assert res["code"] == RetCode.SUCCESS and res["message"] == "success"
51
-
52
- def test_upload_nonexistent_files(self):
53
- """
54
- Test uploading a file which does not exist.
55
- """
56
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
57
- created_res = ragflow.create_dataset("test_upload_nonexistent_files")
58
- dataset_id = created_res["data"]["dataset_id"]
59
- file_paths = ["test_data/imagination.txt"]
60
- res = ragflow.upload_local_file(dataset_id, file_paths)
61
- assert res["code"] == RetCode.DATA_ERROR and "does not exist" in res["message"]
62
-
63
- def test_upload_file_if_dataset_does_not_exist(self):
64
- """
65
- Test uploading files if the dataset id does not exist.
66
- """
67
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
68
- file_paths = ["test_data/test.txt"]
69
- res = ragflow.upload_local_file("111", file_paths)
70
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Can't find this dataset"
71
-
72
- def test_upload_file_without_name(self):
73
- """
74
- Test uploading files that do not have name.
75
- """
76
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
77
- created_res = ragflow.create_dataset("test_upload_file_without_name")
78
- dataset_id = created_res["data"]["dataset_id"]
79
- file_paths = ["test_data/.txt"]
80
- res = ragflow.upload_local_file(dataset_id, file_paths)
81
- assert res["code"] == RetCode.SUCCESS
82
-
83
- def test_upload_file_without_name1(self):
84
- """
85
- Test uploading files that do not have name.
86
- """
87
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
88
- created_res = ragflow.create_dataset("test_upload_file_without_name")
89
- dataset_id = created_res["data"]["dataset_id"]
90
- file_paths = ["test_data/.txt", "test_data/empty.txt"]
91
- res = ragflow.upload_local_file(dataset_id, file_paths)
92
- assert res["code"] == RetCode.SUCCESS
93
-
94
- def test_upload_files_exceeding_the_number_limit(self):
95
- """
96
- Test uploading files whose number exceeds the limit.
97
- """
98
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
99
- created_res = ragflow.create_dataset("test_upload_files_exceeding_the_number_limit")
100
- dataset_id = created_res["data"]["dataset_id"]
101
- file_paths = ["test_data/test.txt", "test_data/test1.txt"] * 256
102
- res = ragflow.upload_local_file(dataset_id, file_paths)
103
- assert (res["message"] ==
104
- "You try to upload 512 files, which exceeds the maximum number of uploading files: 256"
105
- and res["code"] == RetCode.DATA_ERROR)
106
-
107
- def test_upload_files_without_files(self):
108
- """
109
- Test uploading files without files.
110
- """
111
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
112
- created_res = ragflow.create_dataset("test_upload_files_without_files")
113
- dataset_id = created_res["data"]["dataset_id"]
114
- file_paths = [None]
115
- res = ragflow.upload_local_file(dataset_id, file_paths)
116
- assert (res["message"] == "None is not string." and res["code"] == RetCode.ARGUMENT_ERROR)
117
-
118
- def test_upload_files_with_two_files_with_same_name(self):
119
- """
120
- Test uploading files with the same name.
121
- """
122
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
123
- created_res = ragflow.create_dataset("test_upload_files_with_two_files_with_same_name")
124
- dataset_id = created_res["data"]["dataset_id"]
125
- file_paths = ["test_data/test.txt"] * 2
126
- res = ragflow.upload_local_file(dataset_id, file_paths)
127
- assert (res["message"] == "success" and res["code"] == RetCode.SUCCESS)
128
-
129
- def test_upload_files_with_file_paths(self):
130
- """
131
- Test uploading files with only specifying the file path's repo.
132
- """
133
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
134
- created_res = ragflow.create_dataset("test_upload_files_with_file_paths")
135
- dataset_id = created_res["data"]["dataset_id"]
136
- file_paths = ["test_data/"]
137
- res = ragflow.upload_local_file(dataset_id, file_paths)
138
- assert (res["message"] == "The file test_data/ does not exist" and res["code"] == RetCode.DATA_ERROR)
139
-
140
- def test_upload_files_with_remote_file_path(self):
141
- """
142
- Test uploading files with remote files.
143
- """
144
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
145
- created_res = ragflow.create_dataset("test_upload_files_with_remote_file_path")
146
- dataset_id = created_res["data"]["dataset_id"]
147
- file_paths = ["https://github.com/genostack/ragflow"]
148
- res = ragflow.upload_local_file(dataset_id, file_paths)
149
- assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "Remote files have not unsupported."
150
-
151
- # ----------------------------delete a file-----------------------------------------------------
152
- def test_delete_one_file(self):
153
- """
154
- Test deleting one file with success.
155
- """
156
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
157
- created_res = ragflow.create_dataset("test_delete_one_file")
158
- dataset_id = created_res["data"]["dataset_id"]
159
- file_paths = ["test_data/test.txt"]
160
- res = ragflow.upload_local_file(dataset_id, file_paths)
161
- # get the doc_id
162
- data = res["data"][0]
163
- doc_id = data["id"]
164
- # delete the files
165
- deleted_res = ragflow.delete_files(doc_id, dataset_id)
166
- # assert value
167
- assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
168
-
169
- def test_delete_document_with_not_existing_document(self):
170
- """
171
- Test deleting a document that does not exist with failure.
172
- """
173
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
174
- created_res = ragflow.create_dataset("test_delete_document_with_not_existing_document")
175
- dataset_id = created_res["data"]["dataset_id"]
176
- res = ragflow.delete_files("111", dataset_id)
177
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Document 111 not found!"
178
-
179
- def test_delete_document_with_creating_100_documents_and_deleting_100_documents(self):
180
- """
181
- Test deleting documents when uploading 100 docs and deleting 100 docs.
182
- """
183
- # upload 100 docs
184
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
185
- created_res = ragflow.create_dataset("test_delete_one_file")
186
- dataset_id = created_res["data"]["dataset_id"]
187
- file_paths = ["test_data/test.txt"] * 100
188
- res = ragflow.upload_local_file(dataset_id, file_paths)
189
-
190
- # get the doc_id
191
- data = res["data"]
192
- for d in data:
193
- doc_id = d["id"]
194
- # delete the files
195
- deleted_res = ragflow.delete_files(doc_id, dataset_id)
196
- # assert value
197
- assert deleted_res["code"] == RetCode.SUCCESS and deleted_res["data"] is True
198
-
199
- def test_delete_document_from_nonexistent_dataset(self):
200
- """
201
- Test deleting documents from a non-existent dataset
202
- """
203
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
204
- created_res = ragflow.create_dataset("test_delete_one_file")
205
- dataset_id = created_res["data"]["dataset_id"]
206
- file_paths = ["test_data/test.txt"]
207
- res = ragflow.upload_local_file(dataset_id, file_paths)
208
- # get the doc_id
209
- data = res["data"][0]
210
- doc_id = data["id"]
211
- # delete the files
212
- deleted_res = ragflow.delete_files(doc_id, "000")
213
- # assert value
214
- assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
215
- f"The document {doc_id} is not in the dataset: 000, but in the dataset: {dataset_id}.")
216
-
217
- def test_delete_document_which_is_located_in_other_dataset(self):
218
- """
219
- Test deleting a document which is located in other dataset.
220
- """
221
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
222
- # upload a document
223
- created_res = ragflow.create_dataset("test_delete_document_which_is_located_in_other_dataset")
224
- created_res_id = created_res["data"]["dataset_id"]
225
- file_paths = ["test_data/test.txt"]
226
- res = ragflow.upload_local_file(created_res_id, file_paths)
227
- # other dataset
228
- other_res = ragflow.create_dataset("other_dataset")
229
- other_dataset_id = other_res["data"]["dataset_id"]
230
- # get the doc_id
231
- data = res["data"][0]
232
- doc_id = data["id"]
233
- # delete the files from the other dataset
234
- deleted_res = ragflow.delete_files(doc_id, other_dataset_id)
235
- # assert value
236
- assert (deleted_res["code"] == RetCode.ARGUMENT_ERROR and deleted_res["message"] ==
237
- f"The document {doc_id} is not in the dataset: {other_dataset_id}, but in the dataset: {created_res_id}.")
238
-
239
- # ----------------------------list files-----------------------------------------------------
240
- def test_list_documents_with_success(self):
241
- """
242
- Test listing documents with a successful outcome.
243
- """
244
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
245
- # upload a document
246
- created_res = ragflow.create_dataset("test_list_documents_with_success")
247
- created_res_id = created_res["data"]["dataset_id"]
248
- file_paths = ["test_data/test.txt"]
249
- ragflow.upload_local_file(created_res_id, file_paths)
250
- # Call the list_document method
251
- response = ragflow.list_files(created_res_id)
252
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
253
-
254
- def test_list_documents_with_checking_size(self):
255
- """
256
- Test listing documents and verify the size and names of the documents.
257
- """
258
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
259
- # upload 10 documents
260
- created_res = ragflow.create_dataset("test_list_documents_with_checking_size")
261
- created_res_id = created_res["data"]["dataset_id"]
262
- file_paths = ["test_data/test.txt"] * 10
263
- ragflow.upload_local_file(created_res_id, file_paths)
264
- # Call the list_document method
265
- response = ragflow.list_files(created_res_id)
266
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
267
-
268
- def test_list_documents_with_getting_empty_result(self):
269
- """
270
- Test listing documents that should be empty.
271
- """
272
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
273
- # upload 0 documents
274
- created_res = ragflow.create_dataset("test_list_documents_with_getting_empty_result")
275
- created_res_id = created_res["data"]["dataset_id"]
276
- # Call the list_document method
277
- response = ragflow.list_files(created_res_id)
278
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 0
279
-
280
- def test_list_documents_with_creating_100_documents(self):
281
- """
282
- Test listing 100 documents and verify the size of these documents.
283
- """
284
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
285
- # upload 100 documents
286
- created_res = ragflow.create_dataset("test_list_documents_with_creating_100_documents")
287
- created_res_id = created_res["data"]["dataset_id"]
288
- file_paths = ["test_data/test.txt"] * 100
289
- ragflow.upload_local_file(created_res_id, file_paths)
290
- # Call the list_document method
291
- response = ragflow.list_files(created_res_id)
292
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 100
293
-
294
- def test_list_document_with_failure(self):
295
- """
296
- Test listing documents with IndexError.
297
- """
298
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
299
- created_res = ragflow.create_dataset("test_list_document_with_failure")
300
- created_res_id = created_res["data"]["dataset_id"]
301
- response = ragflow.list_files(created_res_id, offset=-1, count=-1)
302
- assert "IndexError" in response["message"] and response["code"] == RetCode.EXCEPTION_ERROR
303
-
304
- def test_list_document_with_verifying_offset_and_count(self):
305
- """
306
- Test listing documents with verifying the functionalities of offset and count.
307
- """
308
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
309
- created_res = ragflow.create_dataset("test_list_document_with_verifying_offset_and_count")
310
- created_res_id = created_res["data"]["dataset_id"]
311
- file_paths = ["test_data/test.txt", "test_data/empty.txt"] * 10
312
- ragflow.upload_local_file(created_res_id, file_paths)
313
- # Call the list_document method
314
- response = ragflow.list_files(created_res_id, offset=2, count=10)
315
-
316
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 10
317
-
318
- def test_list_document_with_verifying_keywords(self):
319
- """
320
- Test listing documents with verifying the functionality of searching keywords.
321
- """
322
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
323
- created_res = ragflow.create_dataset("test_list_document_with_verifying_keywords")
324
- created_res_id = created_res["data"]["dataset_id"]
325
- file_paths = ["test_data/test.txt", "test_data/empty.txt"]
326
- ragflow.upload_local_file(created_res_id, file_paths)
327
- # Call the list_document method
328
- response = ragflow.list_files(created_res_id, keywords="empty")
329
-
330
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 1
331
-
332
- def test_list_document_with_verifying_order_by_and_descend(self):
333
- """
334
- Test listing documents with verifying the functionality of order_by and descend.
335
- """
336
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
337
- created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_descend")
338
- created_res_id = created_res["data"]["dataset_id"]
339
- file_paths = ["test_data/test.txt", "test_data/empty.txt"]
340
- ragflow.upload_local_file(created_res_id, file_paths)
341
- # Call the list_document method
342
- response = ragflow.list_files(created_res_id)
343
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 2
344
- docs = response["data"]["docs"]
345
- # reverse
346
- i = 1
347
- for doc in docs:
348
- assert doc["name"] in file_paths[i]
349
- i -= 1
350
-
351
- def test_list_document_with_verifying_order_by_and_ascend(self):
352
- """
353
- Test listing documents with verifying the functionality of order_by and ascend.
354
- """
355
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
356
- created_res = ragflow.create_dataset("test_list_document_with_verifying_order_by_and_ascend")
357
- created_res_id = created_res["data"]["dataset_id"]
358
- file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
359
- ragflow.upload_local_file(created_res_id, file_paths)
360
- # Call the list_document method
361
- response = ragflow.list_files(created_res_id, descend=False)
362
- assert response["code"] == RetCode.SUCCESS and len(response["data"]["docs"]) == 3
363
-
364
- docs = response["data"]["docs"]
365
-
366
- i = 0
367
- for doc in docs:
368
- assert doc["name"] in file_paths[i]
369
- i += 1
370
-
371
- # ----------------------------update files: enable, rename, template_type-------------------------------------------
372
-
373
- def test_update_nonexistent_document(self):
374
- """
375
- Test updating a document which does not exist.
376
- """
377
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
378
- created_res = ragflow.create_dataset("test_update_nonexistent_document")
379
- created_res_id = created_res["data"]["dataset_id"]
380
- params = {
381
- "name": "new_name"
382
- }
383
- res = ragflow.update_file(created_res_id, "weird_doc_id", **params)
384
- assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document weird_doc_id cannot be found!"
385
-
386
- def test_update_document_without_parameters(self):
387
- """
388
- Test updating a document without giving parameters.
389
- """
390
- # create a dataset
391
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
392
- created_res = ragflow.create_dataset("test_update_document_without_parameters")
393
- created_res_id = created_res["data"]["dataset_id"]
394
- # upload files
395
- file_paths = ["test_data/test.txt"]
396
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
397
- # get the doc_id
398
- data = uploading_res["data"][0]
399
- doc_id = data["id"]
400
- # update file
401
- params = {
402
- }
403
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
404
- assert (update_res["code"] == RetCode.DATA_ERROR and
405
- update_res["message"] == "Please input at least one parameter that you want to update!")
406
-
407
- def test_update_document_in_nonexistent_dataset(self):
408
- """
409
- Test updating a document in the nonexistent dataset.
410
- """
411
- # create a dataset
412
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
413
- created_res = ragflow.create_dataset("test_update_document_in_nonexistent_dataset")
414
- created_res_id = created_res["data"]["dataset_id"]
415
- # upload files
416
- file_paths = ["test_data/test.txt"]
417
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
418
- # get the doc_id
419
- data = uploading_res["data"][0]
420
- doc_id = data["id"]
421
- # update file
422
- params = {
423
- "name": "new_name"
424
- }
425
- update_res = ragflow.update_file("fake_dataset_id", doc_id, **params)
426
- assert (update_res["code"] == RetCode.DATA_ERROR and
427
- update_res["message"] == f"This dataset fake_dataset_id cannot be found!")
428
-
429
- def test_update_document_with_different_extension_name(self):
430
- """
431
- Test the updating of a document with an extension name that differs from its original.
432
- """
433
- # create a dataset
434
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
435
- created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
436
- created_res_id = created_res["data"]["dataset_id"]
437
- # upload files
438
- file_paths = ["test_data/test.txt"]
439
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
440
- # get the doc_id
441
- data = uploading_res["data"][0]
442
- doc_id = data["id"]
443
- # update file
444
- params = {
445
- "name": "new_name.doc"
446
- }
447
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
448
- assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
449
- update_res["message"] == "The extension of file cannot be changed")
450
-
451
- def test_update_document_with_duplicate_name(self):
452
- """
453
- Test the updating of a document with a duplicate name.
454
- """
455
- # create a dataset
456
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
457
- created_res = ragflow.create_dataset("test_update_document_with_different_extension_name")
458
- created_res_id = created_res["data"]["dataset_id"]
459
- # upload files
460
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
461
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
462
- # get the doc_id
463
- data = uploading_res["data"][0]
464
- doc_id = data["id"]
465
- # update file
466
- params = {
467
- "name": "test.txt"
468
- }
469
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
470
- assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
471
- update_res["message"] == "Duplicated document name in the same dataset.")
472
-
473
- def test_update_document_with_updating_its_name_with_success(self):
474
- """
475
- Test the updating of a document's name with success.
476
- """
477
- # create a dataset
478
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
479
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
480
- created_res_id = created_res["data"]["dataset_id"]
481
- # upload files
482
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
483
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
484
- # get the doc_id
485
- data = uploading_res["data"][0]
486
- doc_id = data["id"]
487
- # update file
488
- params = {
489
- "name": "new_name.txt"
490
- }
491
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
492
- assert (update_res["code"] == RetCode.SUCCESS and
493
- update_res["message"] == "Success" and update_res["data"]["name"] == "new_name.txt")
494
-
495
- def test_update_document_with_updating_its_template_type_with_success(self):
496
- """
497
- Test the updating of a document's template type with success.
498
- """
499
- # create a dataset
500
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
501
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_template_type_with_success")
502
- created_res_id = created_res["data"]["dataset_id"]
503
- # upload files
504
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
505
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
506
- # get the doc_id
507
- data = uploading_res["data"][0]
508
- doc_id = data["id"]
509
- # update file
510
- params = {
511
- "template_type": "laws"
512
- }
513
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
514
- assert (update_res["code"] == RetCode.SUCCESS and
515
- update_res["message"] == "Success" and update_res["data"]["parser_id"] == "laws")
516
-
517
- def test_update_document_with_updating_its_enable_value_with_success(self):
518
- """
519
- Test the updating of a document's enable value with success.
520
- """
521
- # create a dataset
522
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
523
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_enable_value_with_success")
524
- created_res_id = created_res["data"]["dataset_id"]
525
- # upload files
526
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
527
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
528
- # get the doc_id
529
- data = uploading_res["data"][0]
530
- doc_id = data["id"]
531
- # update file
532
- params = {
533
- "enable": "0"
534
- }
535
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
536
- assert (update_res["code"] == RetCode.SUCCESS and
537
- update_res["message"] == "Success" and update_res["data"]["status"] == "0")
538
-
539
- def test_update_document_with_updating_illegal_parameter(self):
540
- """
541
- Test the updating of a document's illegal parameter.
542
- """
543
- # create a dataset
544
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
545
- created_res = ragflow.create_dataset("test_update_document_with_updating_illegal_parameter")
546
- created_res_id = created_res["data"]["dataset_id"]
547
- # upload files
548
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
549
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
550
- # get the doc_id
551
- data = uploading_res["data"][0]
552
- doc_id = data["id"]
553
- # update file
554
- params = {
555
- "illegal_parameter": "0"
556
- }
557
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
558
- assert (update_res["code"] == RetCode.ARGUMENT_ERROR and
559
- update_res["message"] == "illegal_parameter is an illegal parameter.")
560
-
561
- def test_update_document_with_giving_its_name_value(self):
562
- """
563
- Test the updating of a document's name without its name value.
564
- """
565
- # create a dataset
566
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
567
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
568
- created_res_id = created_res["data"]["dataset_id"]
569
- # upload files
570
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
571
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
572
- # get the doc_id
573
- data = uploading_res["data"][0]
574
- doc_id = data["id"]
575
- # update file
576
- params = {
577
- "name": ""
578
- }
579
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
580
- assert (update_res["code"] == RetCode.DATA_ERROR and
581
- update_res["message"] == "There is no new name.")
582
-
583
- def test_update_document_with_giving_illegal_value_for_enable(self):
584
- """
585
- Test the updating of a document's with giving illegal enable's value.
586
- """
587
- # create a dataset
588
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
589
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
590
- created_res_id = created_res["data"]["dataset_id"]
591
- # upload files
592
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
593
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
594
- # get the doc_id
595
- data = uploading_res["data"][0]
596
- doc_id = data["id"]
597
- # update file
598
- params = {
599
- "enable": "?"
600
- }
601
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
602
- assert (update_res["code"] == RetCode.DATA_ERROR and
603
- update_res["message"] == "Illegal value ? for 'enable' field.")
604
-
605
- def test_update_document_with_giving_illegal_value_for_type(self):
606
- """
607
- Test the updating of a document's with giving illegal type's value.
608
- """
609
- # create a dataset
610
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
611
- created_res = ragflow.create_dataset("test_update_document_with_updating_its_name_with_success")
612
- created_res_id = created_res["data"]["dataset_id"]
613
- # upload files
614
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
615
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
616
- # get the doc_id
617
- data = uploading_res["data"][0]
618
- doc_id = data["id"]
619
- # update file
620
- params = {
621
- "template_type": "?"
622
- }
623
- update_res = ragflow.update_file(created_res_id, doc_id, **params)
624
- assert (update_res["code"] == RetCode.DATA_ERROR and
625
- update_res["message"] == "Illegal value ? for 'template_type' field.")
626
-
627
- # ----------------------------download a file-----------------------------------------------------
628
-
629
- def test_download_nonexistent_document(self):
630
- """
631
- Test downloading a document which does not exist.
632
- """
633
- # create a dataset
634
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
635
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
636
- created_res_id = created_res["data"]["dataset_id"]
637
- res = ragflow.download_file(created_res_id, "imagination")
638
- assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == f"This document 'imagination' cannot be found!"
639
-
640
- def test_download_document_in_nonexistent_dataset(self):
641
- """
642
- Test downloading a document whose dataset is nonexistent.
643
- """
644
- # create a dataset
645
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
646
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
647
- created_res_id = created_res["data"]["dataset_id"]
648
- # upload files
649
- file_paths = ["test_data/test.txt"]
650
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
651
- # get the doc_id
652
- data = uploading_res["data"][0]
653
- doc_id = data["id"]
654
- # download file
655
- res = ragflow.download_file("imagination", doc_id)
656
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == f"This dataset 'imagination' cannot be found!"
657
-
658
- def test_download_document_with_success(self):
659
- """
660
- Test the downloading of a document with success.
661
- """
662
- # create a dataset
663
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
664
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
665
- created_res_id = created_res["data"]["dataset_id"]
666
- # upload files
667
- file_paths = ["test_data/test.txt"]
668
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
669
- # get the doc_id
670
- data = uploading_res["data"][0]
671
- doc_id = data["id"]
672
- # download file
673
- with open("test_data/test.txt", "rb") as file:
674
- binary_data = file.read()
675
- res = ragflow.download_file(created_res_id, doc_id)
676
- assert res["code"] == RetCode.SUCCESS and res["data"] == binary_data
677
-
678
- def test_download_an_empty_document(self):
679
- """
680
- Test the downloading of an empty document.
681
- """
682
- # create a dataset
683
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
684
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
685
- created_res_id = created_res["data"]["dataset_id"]
686
- # upload files
687
- file_paths = ["test_data/empty.txt"]
688
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
689
- # get the doc_id
690
- data = uploading_res["data"][0]
691
- doc_id = data["id"]
692
- # download file
693
- res = ragflow.download_file(created_res_id, doc_id)
694
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This file is empty."
695
-
696
- # ----------------------------start parsing-----------------------------------------------------
697
- def test_start_parsing_document_with_success(self):
698
- """
699
- Test the parsing of a document with success.
700
- """
701
- # create a dataset
702
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
703
- created_res = ragflow.create_dataset("test_start_parsing_document_with_success")
704
- created_res_id = created_res["data"]["dataset_id"]
705
- # upload files
706
- file_paths = ["test_data/lol.txt"]
707
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
708
- # get the doc_id
709
- data = uploading_res["data"][0]
710
- doc_id = data["id"]
711
- # parse file
712
- res = ragflow.start_parsing_document(created_res_id, doc_id)
713
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
714
-
715
- def test_start_parsing_nonexistent_document(self):
716
- """
717
- Test the parsing a document which does not exist.
718
- """
719
- # create a dataset
720
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
721
- created_res = ragflow.create_dataset("test_start_parsing_nonexistent_document")
722
- created_res_id = created_res["data"]["dataset_id"]
723
- res = ragflow.start_parsing_document(created_res_id, "imagination")
724
- assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "This document 'imagination' cannot be found!"
725
-
726
- def test_start_parsing_document_in_nonexistent_dataset(self):
727
- """
728
- Test the parsing a document whose dataset is nonexistent.
729
- """
730
- # create a dataset
731
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
732
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
733
- created_res_id = created_res["data"]["dataset_id"]
734
- # upload files
735
- file_paths = ["test_data/test.txt"]
736
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
737
- # get the doc_id
738
- data = uploading_res["data"][0]
739
- doc_id = data["id"]
740
- # parse
741
- res = ragflow.start_parsing_document("imagination", doc_id)
742
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
743
-
744
- def test_start_parsing_an_empty_document(self):
745
- """
746
- Test the parsing of an empty document.
747
- """
748
- # create a dataset
749
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
750
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
751
- created_res_id = created_res["data"]["dataset_id"]
752
- # upload files
753
- file_paths = ["test_data/empty.txt"]
754
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
755
- # get the doc_id
756
- data = uploading_res["data"][0]
757
- doc_id = data["id"]
758
- res = ragflow.start_parsing_document(created_res_id, doc_id)
759
- assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
760
-
761
- # ------------------------parsing multiple documents----------------------------
762
- def test_start_parsing_documents_in_nonexistent_dataset(self):
763
- """
764
- Test the parsing documents whose dataset is nonexistent.
765
- """
766
- # create a dataset
767
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
768
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
769
- created_res_id = created_res["data"]["dataset_id"]
770
- # upload files
771
- file_paths = ["test_data/test.txt"]
772
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
773
- # parse
774
- res = ragflow.start_parsing_documents("imagination")
775
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
776
-
777
- def test_start_parsing_multiple_documents(self):
778
- """
779
- Test the parsing documents with a success.
780
- """
781
- # create a dataset
782
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
783
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
784
- created_res_id = created_res["data"]["dataset_id"]
785
- # upload files
786
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
787
- ragflow.upload_local_file(created_res_id, file_paths)
788
- res = ragflow.start_parsing_documents(created_res_id)
789
- assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
790
-
791
- def test_start_parsing_multiple_documents_with_one_empty_file(self):
792
- """
793
- Test the parsing documents, one of which is empty.
794
- """
795
- # create a dataset
796
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
797
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
798
- created_res_id = created_res["data"]["dataset_id"]
799
- # upload files
800
- file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
801
- ragflow.upload_local_file(created_res_id, file_paths)
802
- res = ragflow.start_parsing_documents(created_res_id)
803
- assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
804
-
805
- def test_start_parsing_multiple_specific_documents(self):
806
- """
807
- Test the parsing documents whose document ids are specified.
808
- """
809
- # create a dataset
810
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
811
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
812
- created_res_id = created_res["data"]["dataset_id"]
813
- # upload files
814
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
815
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
816
- # get the doc_id
817
- data = uploading_res["data"]
818
- doc_ids = []
819
- for d in data:
820
- doc_ids.append(d["id"])
821
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
822
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
823
-
824
- def test_start_re_parsing_multiple_specific_documents(self):
825
- """
826
- Test the re-parsing documents.
827
- """
828
- # create a dataset
829
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
830
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
831
- created_res_id = created_res["data"]["dataset_id"]
832
- # upload files
833
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
834
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
835
- # get the doc_id
836
- data = uploading_res["data"]
837
- doc_ids = []
838
- for d in data:
839
- doc_ids.append(d["id"])
840
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
841
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
842
- # re-parse
843
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
844
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
845
-
846
- def test_start_re_parsing_multiple_specific_documents_with_changing_parser_id(self):
847
- """
848
- Test the re-parsing documents after changing the parser id.
849
- """
850
- # create a dataset
851
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
852
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
853
- created_res_id = created_res["data"]["dataset_id"]
854
- # upload files
855
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
856
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
857
- # get the doc_id
858
- data = uploading_res["data"]
859
- doc_ids = []
860
- for d in data:
861
- doc_ids.append(d["id"])
862
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
863
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
864
- # general -> laws
865
- params = {
866
- "template_type": "laws"
867
- }
868
- ragflow.update_file(created_res_id, doc_ids[0], **params)
869
- # re-parse
870
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
871
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
872
-
873
- def test_start_re_parsing_multiple_specific_documents_with_changing_illegal_parser_id(self):
874
- """
875
- Test the re-parsing documents after changing an illegal parser id.
876
- """
877
- # create a dataset
878
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
879
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
880
- created_res_id = created_res["data"]["dataset_id"]
881
- # upload files
882
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
883
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
884
- # get the doc_id
885
- data = uploading_res["data"]
886
- doc_ids = []
887
- for d in data:
888
- doc_ids.append(d["id"])
889
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
890
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
891
- # general -> illegal
892
- params = {
893
- "template_type": "illegal"
894
- }
895
- res = ragflow.update_file(created_res_id, doc_ids[0], **params)
896
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'template_type' field."
897
- # re-parse
898
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
899
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
900
-
901
- def test_start_parsing_multiple_specific_documents_with_changing_illegal_parser_id(self):
902
- """
903
- Test the parsing documents after changing an illegal parser id.
904
- """
905
- # create a dataset
906
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
907
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
908
- created_res_id = created_res["data"]["dataset_id"]
909
- # upload files
910
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
911
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
912
- # get the doc_id
913
- data = uploading_res["data"]
914
- doc_ids = []
915
- for d in data:
916
- doc_ids.append(d["id"])
917
- # general -> illegal
918
- params = {
919
- "template_type": "illegal"
920
- }
921
- res = ragflow.update_file(created_res_id, doc_ids[0], **params)
922
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'template_type' field."
923
- # re-parse
924
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
925
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
926
-
927
- def test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal(self):
928
- """
929
- Test the parsing documents whose dataset's parser id is illegal.
930
- """
931
- # create a dataset
932
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
933
- created_res = ragflow.create_dataset("test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal")
934
- created_res_id = created_res["data"]["dataset_id"]
935
- # update the parser id
936
- params = {
937
- "chunk_method": "illegal"
938
- }
939
- res = ragflow.update_dataset("test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_illegal", **params)
940
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "Illegal value illegal for 'chunk_method' field."
941
- # upload files
942
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
943
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
944
- # get the doc_id
945
- data = uploading_res["data"]
946
- doc_ids = []
947
- for d in data:
948
- doc_ids.append(d["id"])
949
- # parse
950
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
951
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
952
-
953
- # ----------------------------stop parsing-----------------------------------------------------
954
- def test_stop_parsing_document_with_success(self):
955
- """
956
- Test the stopping parsing of a document with success.
957
- """
958
- # create a dataset
959
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
960
- created_res = ragflow.create_dataset("test_start_parsing_document_with_success")
961
- created_res_id = created_res["data"]["dataset_id"]
962
- # upload files
963
- file_paths = ["test_data/lol.txt"]
964
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
965
- # get the doc_id
966
- data = uploading_res["data"][0]
967
- doc_id = data["id"]
968
- # parse file
969
- res = ragflow.start_parsing_document(created_res_id, doc_id)
970
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
971
- res = ragflow.stop_parsing_document(created_res_id, doc_id)
972
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
973
-
974
- def test_stop_parsing_nonexistent_document(self):
975
- """
976
- Test the stopping parsing a document which does not exist.
977
- """
978
- # create a dataset
979
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
980
- created_res = ragflow.create_dataset("test_start_parsing_nonexistent_document")
981
- created_res_id = created_res["data"]["dataset_id"]
982
- res = ragflow.stop_parsing_document(created_res_id, "imagination.txt")
983
- assert res["code"] == RetCode.ARGUMENT_ERROR and res["message"] == "This document 'imagination.txt' cannot be found!"
984
-
985
- def test_stop_parsing_document_in_nonexistent_dataset(self):
986
- """
987
- Test the stopping parsing a document whose dataset is nonexistent.
988
- """
989
- # create a dataset
990
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
991
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
992
- created_res_id = created_res["data"]["dataset_id"]
993
- # upload files
994
- file_paths = ["test_data/test.txt"]
995
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
996
- # get the doc_id
997
- data = uploading_res["data"][0]
998
- doc_id = data["id"]
999
- # parse
1000
- res = ragflow.stop_parsing_document("imagination", doc_id)
1001
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
1002
-
1003
- # ------------------------stop parsing multiple documents----------------------------
1004
- def test_stop_parsing_documents_in_nonexistent_dataset(self):
1005
- """
1006
- Test the stopping parsing documents whose dataset is nonexistent.
1007
- """
1008
- # create a dataset
1009
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1010
- created_res = ragflow.create_dataset("test_download_nonexistent_document")
1011
- created_res_id = created_res["data"]["dataset_id"]
1012
- # upload files
1013
- file_paths = ["test_data/test.txt"]
1014
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1015
- # parse
1016
- res = ragflow.stop_parsing_documents("imagination")
1017
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset 'imagination' cannot be found!"
1018
-
1019
- def test_stop_parsing_multiple_documents(self):
1020
- """
1021
- Test the stopping parsing documents with a success.
1022
- """
1023
- # create a dataset
1024
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1025
- created_res = ragflow.create_dataset("test_start_parsing_multiple_documents")
1026
- created_res_id = created_res["data"]["dataset_id"]
1027
- # upload files
1028
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
1029
- ragflow.upload_local_file(created_res_id, file_paths)
1030
- res = ragflow.start_parsing_documents(created_res_id)
1031
- assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1032
-
1033
- res = ragflow.stop_parsing_documents(created_res_id)
1034
- assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1035
-
1036
- def test_stop_parsing_multiple_documents_with_one_empty_file(self):
1037
- """
1038
- Test the stopping parsing documents, one of which is empty.
1039
- """
1040
- # create a dataset
1041
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1042
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
1043
- created_res_id = created_res["data"]["dataset_id"]
1044
- # upload files
1045
- file_paths = ["test_data/test.txt", "test_data/test1.txt", "test_data/empty.txt"]
1046
- ragflow.upload_local_file(created_res_id, file_paths)
1047
- res = ragflow.start_parsing_documents(created_res_id)
1048
- assert res["code"] == RetCode.SUCCESS and res["message"] == "Empty data in the document: empty.txt; "
1049
- res = ragflow.stop_parsing_documents(created_res_id)
1050
- assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1051
-
1052
- def test_stop_parsing_multiple_specific_documents(self):
1053
- """
1054
- Test the stopping parsing documents whose document ids are specified.
1055
- """
1056
- # create a dataset
1057
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1058
- created_res = ragflow.create_dataset(" test_start_parsing_multiple_documents")
1059
- created_res_id = created_res["data"]["dataset_id"]
1060
- # upload files
1061
- file_paths = ["test_data/test.txt", "test_data/test1.txt"]
1062
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1063
- # get the doc_id
1064
- data = uploading_res["data"]
1065
- doc_ids = []
1066
- for d in data:
1067
- doc_ids.append(d["id"])
1068
- res = ragflow.start_parsing_documents(created_res_id, doc_ids)
1069
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
1070
- res = ragflow.stop_parsing_documents(created_res_id, doc_ids)
1071
- assert res["code"] == RetCode.SUCCESS and res["data"] is True and res["message"] == ""
1072
-
1073
- # ----------------------------show the status of the file-----------------------------------------------------
1074
- def test_show_status_with_success(self):
1075
- # create a dataset
1076
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1077
- created_res = ragflow.create_dataset("test_show_status_with_success")
1078
- created_res_id = created_res["data"]["dataset_id"]
1079
- # upload files
1080
- file_paths = ["test_data/lol.txt"]
1081
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1082
- # get the doc_id
1083
- data = uploading_res["data"][0]
1084
- doc_id = data["id"]
1085
- # parse file
1086
- res = ragflow.start_parsing_document(created_res_id, doc_id)
1087
- assert res["code"] == RetCode.SUCCESS and res["message"] == ""
1088
- # show status
1089
- status_res = ragflow.show_parsing_status(created_res_id, doc_id)
1090
- assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "RUNNING"
1091
-
1092
- def test_show_status_nonexistent_document(self):
1093
- """
1094
- Test showing the status of a document which does not exist.
1095
- """
1096
- # create a dataset
1097
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1098
- created_res = ragflow.create_dataset("test_show_status_nonexistent_document")
1099
- created_res_id = created_res["data"]["dataset_id"]
1100
- res = ragflow.show_parsing_status(created_res_id, "imagination")
1101
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document."
1102
-
1103
- def test_show_status_document_in_nonexistent_dataset(self):
1104
- """
1105
- Test showing the status of a document whose dataset is nonexistent.
1106
- """
1107
- # create a dataset
1108
- ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
1109
- created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset")
1110
- created_res_id = created_res["data"]["dataset_id"]
1111
- # upload files
1112
- file_paths = ["test_data/test.txt"]
1113
- uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
1114
- # get the doc_id
1115
- data = uploading_res["data"][0]
1116
- doc_id = data["id"]
1117
- # parse
1118
- res = ragflow.show_parsing_status("imagination", doc_id)
1119
- assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!"
1120
- # ----------------------------list the chunks of the file-----------------------------------------------------
1121
-
1122
- # ----------------------------delete the chunk-----------------------------------------------------
1123
-
1124
- # ----------------------------edit the status of the chunk-----------------------------------------------------
1125
-
1126
- # ----------------------------insert a new chunk-----------------------------------------------------
1127
-
1128
- # ----------------------------get a specific chunk-----------------------------------------------------
1129
-
1130
- # ----------------------------retrieval test-----------------------------------------------------