jinhai-2012 commited on
Commit
2067c85
·
1 Parent(s): 449650c

Web API test cases (#3812)

Browse files

### What problem does this PR solve?

1. Failed update dataset case
2. Upload and parse text file

### Type of change

- [x] Other (please describe): test cases

---------

Signed-off-by: jinhai <[email protected]>

sdk/python/test/test_frontend_api/common.py CHANGED
@@ -5,6 +5,7 @@ HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
5
 
6
  DATASET_NAME_LIMIT = 128
7
 
 
8
  def create_dataset(auth, dataset_name):
9
  authorization = {"Authorization": auth}
10
  url = f"{HOST_ADDRESS}/v1/kb/create"
@@ -27,8 +28,53 @@ def rm_dataset(auth, dataset_id):
27
  res = requests.post(url=url, headers=authorization, json=json)
28
  return res.json()
29
 
 
30
  def update_dataset(auth, json_req):
31
  authorization = {"Authorization": auth}
32
  url = f"{HOST_ADDRESS}/v1/kb/update"
33
  res = requests.post(url=url, headers=authorization, json=json_req)
34
  return res.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  DATASET_NAME_LIMIT = 128
7
 
8
+
9
  def create_dataset(auth, dataset_name):
10
  authorization = {"Authorization": auth}
11
  url = f"{HOST_ADDRESS}/v1/kb/create"
 
28
  res = requests.post(url=url, headers=authorization, json=json)
29
  return res.json()
30
 
31
+
32
  def update_dataset(auth, json_req):
33
  authorization = {"Authorization": auth}
34
  url = f"{HOST_ADDRESS}/v1/kb/update"
35
  res = requests.post(url=url, headers=authorization, json=json_req)
36
  return res.json()
37
+
38
+
39
+ def upload_file(auth, dataset_id, path):
40
+ authorization = {"Authorization": auth}
41
+ url = f"{HOST_ADDRESS}/v1/document/upload"
42
+ base_name = os.path.basename(path)
43
+ json_req = {
44
+ "kb_id": dataset_id,
45
+ }
46
+
47
+ file = {
48
+ 'file': open(f'{path}', 'rb')
49
+ }
50
+
51
+ res = requests.post(url=url, headers=authorization, files=file, data=json_req)
52
+ return res.json()
53
+
54
+ def list_document(auth, dataset_id):
55
+ authorization = {"Authorization": auth}
56
+ url = f"{HOST_ADDRESS}/v1/document/list?kb_id={dataset_id}"
57
+ res = requests.get(url=url, headers=authorization)
58
+ return res.json()
59
+
60
+ def get_docs_info(auth, doc_ids):
61
+ authorization = {"Authorization": auth}
62
+ json_req = {
63
+ "doc_ids": doc_ids
64
+ }
65
+ url = f"{HOST_ADDRESS}/v1/document/infos"
66
+ res = requests.post(url=url, headers=authorization, json=json_req)
67
+ return res.json()
68
+
69
+ def parse_docs(auth, doc_ids):
70
+ authorization = {"Authorization": auth}
71
+ json_req = {
72
+ "doc_ids": doc_ids,
73
+ "run": 1
74
+ }
75
+ url = f"{HOST_ADDRESS}/v1/document/run"
76
+ res = requests.post(url=url, headers=authorization, json=json_req)
77
+ return res.json()
78
+
79
+ def parse_file(auth, document_id):
80
+ pass
sdk/python/test/test_frontend_api/test_chunk.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, upload_file, DATASET_NAME_LIMIT
17
+ from common import list_document, get_docs_info, parse_docs
18
+ from time import sleep
19
+ from timeit import default_timer as timer
20
+ import re
21
+ import pytest
22
+ import random
23
+ import string
24
+
25
+
26
+ def test_parse_txt_document(get_auth):
27
+ # create dataset
28
+ res = create_dataset(get_auth, "test_parse_txt_document")
29
+ assert res.get("code") == 0, f"{res.get('message')}"
30
+
31
+ # list dataset
32
+ page_number = 1
33
+ dataset_list = []
34
+ dataset_id = None
35
+ while True:
36
+ res = list_dataset(get_auth, page_number)
37
+ data = res.get("data").get("kbs")
38
+ for item in data:
39
+ dataset_id = item.get("id")
40
+ dataset_list.append(dataset_id)
41
+ if len(dataset_list) < page_number * 150:
42
+ break
43
+ page_number += 1
44
+
45
+ filename = 'ragflow_test.txt'
46
+ res = upload_file(get_auth, dataset_id, f"../test_sdk_api/test_data/{filename}")
47
+ assert res.get("code") == 0, f"{res.get('message')}"
48
+
49
+ res = list_document(get_auth, dataset_id)
50
+
51
+ doc_id_list = []
52
+ for doc in res['data']['docs']:
53
+ doc_id_list.append(doc['id'])
54
+
55
+ res = get_docs_info(get_auth, doc_id_list)
56
+ print(doc_id_list)
57
+ doc_count = len(doc_id_list)
58
+ res = parse_docs(get_auth, doc_id_list)
59
+
60
+ start_ts = timer()
61
+ while True:
62
+ res = get_docs_info(get_auth, doc_id_list)
63
+ finished_count = 0
64
+ for doc_info in res['data']:
65
+ if doc_info['progress'] == 1:
66
+ finished_count += 1
67
+ if finished_count == doc_count:
68
+ break
69
+ sleep(1)
70
+ print('time cost {:.1f}s'.format(timer() - start_ts))
71
+
72
+ # delete dataset
73
+ for dataset_id in dataset_list:
74
+ res = rm_dataset(get_auth, dataset_id)
75
+ assert res.get("code") == 0, f"{res.get('message')}"
76
+ print(f"{len(dataset_list)} datasets are deleted")