jinhai-2012 commited on
Commit
0a05763
·
1 Parent(s): b6c8684

Add more web test cases (#3702)

Browse files

### What problem does this PR solve?

Test cases about dataset

### Type of change

- [x] Other (please describe): test cases

---------

Signed-off-by: jinhai <[email protected]>

api/apps/kb_app.py CHANGED
@@ -29,6 +29,7 @@ from api.db.db_models import File
29
  from api.utils.api_utils import get_json_result
30
  from api import settings
31
  from rag.nlp import search
 
32
 
33
 
34
  @manager.route('/create', methods=['post'])
@@ -36,10 +37,19 @@ from rag.nlp import search
36
  @validate_request("name")
37
  def create():
38
  req = request.json
39
- req["name"] = req["name"].strip()
40
- req["name"] = duplicate_name(
 
 
 
 
 
 
 
 
 
41
  KnowledgebaseService.query,
42
- name=req["name"],
43
  tenant_id=current_user.id,
44
  status=StatusEnum.VALID.value)
45
  try:
@@ -73,7 +83,8 @@ def update():
73
  if not KnowledgebaseService.query(
74
  created_by=current_user.id, id=req["kb_id"]):
75
  return get_json_result(
76
- data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR)
 
77
 
78
  e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
79
  if not e:
@@ -81,7 +92,8 @@ def update():
81
  message="Can't find this knowledgebase!")
82
 
83
  if req["name"].lower() != kb.name.lower() \
84
- and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
 
85
  return get_data_error_result(
86
  message="Duplicated knowledgebase name.")
87
 
@@ -152,10 +164,11 @@ def rm():
152
  )
153
  try:
154
  kbs = KnowledgebaseService.query(
155
- created_by=current_user.id, id=req["kb_id"])
156
  if not kbs:
157
  return get_json_result(
158
- data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR)
 
159
 
160
  for doc in DocumentService.query(kb_id=req["kb_id"]):
161
  if not DocumentService.remove_document(doc, kbs[0].tenant_id):
 
29
  from api.utils.api_utils import get_json_result
30
  from api import settings
31
  from rag.nlp import search
32
+ from api.constants import DATASET_NAME_LIMIT
33
 
34
 
35
  @manager.route('/create', methods=['post'])
 
37
  @validate_request("name")
38
  def create():
39
  req = request.json
40
+ dataset_name = req["name"]
41
+ if not isinstance(dataset_name, str):
42
+ return get_data_error_result(message="Dataset name must be string.")
43
+ if dataset_name == "":
44
+ return get_data_error_result(message="Dataset name can't be empty.")
45
+ if len(dataset_name) >= DATASET_NAME_LIMIT:
46
+ return get_data_error_result(
47
+ message=f"Dataset name length is {len(dataset_name)} which is large than {DATASET_NAME_LIMIT}")
48
+
49
+ dataset_name = dataset_name.strip()
50
+ dataset_name = duplicate_name(
51
  KnowledgebaseService.query,
52
+ name=dataset_name,
53
  tenant_id=current_user.id,
54
  status=StatusEnum.VALID.value)
55
  try:
 
83
  if not KnowledgebaseService.query(
84
  created_by=current_user.id, id=req["kb_id"]):
85
  return get_json_result(
86
+ data=False, message='Only owner of knowledgebase authorized for this operation.',
87
+ code=settings.RetCode.OPERATING_ERROR)
88
 
89
  e, kb = KnowledgebaseService.get_by_id(req["kb_id"])
90
  if not e:
 
92
  message="Can't find this knowledgebase!")
93
 
94
  if req["name"].lower() != kb.name.lower() \
95
+ and len(
96
+ KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1:
97
  return get_data_error_result(
98
  message="Duplicated knowledgebase name.")
99
 
 
164
  )
165
  try:
166
  kbs = KnowledgebaseService.query(
167
+ created_by=current_user.id, id=req["kb_id"])
168
  if not kbs:
169
  return get_json_result(
170
+ data=False, message='Only owner of knowledgebase authorized for this operation.',
171
+ code=settings.RetCode.OPERATING_ERROR)
172
 
173
  for doc in DocumentService.query(kb_id=req["kb_id"]):
174
  if not DocumentService.remove_document(doc, kbs[0].tenant_id):
api/constants.py CHANGED
@@ -23,3 +23,5 @@ API_VERSION = "v1"
23
  RAG_FLOW_SERVICE_NAME = "ragflow"
24
  REQUEST_WAIT_SEC = 2
25
  REQUEST_MAX_WAIT_SEC = 300
 
 
 
23
  RAG_FLOW_SERVICE_NAME = "ragflow"
24
  REQUEST_WAIT_SEC = 2
25
  REQUEST_MAX_WAIT_SEC = 300
26
+
27
+ DATASET_NAME_LIMIT = 128
rag/utils/infinity_conn.py CHANGED
@@ -310,7 +310,9 @@ class InfinityConnection(DocStoreConnection):
310
  table_name = f"{indexName}_{knowledgebaseId}"
311
  table_instance = db_instance.get_table(table_name)
312
  kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl()
313
- df_list.append(kb_res)
 
 
314
  self.connPool.release_conn(inf_conn)
315
  res = concat_dataframes(df_list, ["id"])
316
  res_fields = self.getFields(res, res.columns)
 
310
  table_name = f"{indexName}_{knowledgebaseId}"
311
  table_instance = db_instance.get_table(table_name)
312
  kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl()
313
+ if len(kb_res) != 0 and kb_res.shape[0] > 0:
314
+ df_list.append(kb_res)
315
+
316
  self.connPool.release_conn(inf_conn)
317
  res = concat_dataframes(df_list, ["id"])
318
  res_fields = self.getFields(res, res.columns)
sdk/python/test/test_frontend_api/common.py CHANGED
@@ -3,6 +3,8 @@ import requests
3
 
4
  HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
5
 
 
 
6
  def create_dataset(auth, dataset_name):
7
  authorization = {"Authorization": auth}
8
  url = f"{HOST_ADDRESS}/v1/kb/create"
@@ -24,3 +26,9 @@ def rm_dataset(auth, dataset_id):
24
  json = {"kb_id": dataset_id}
25
  res = requests.post(url=url, headers=authorization, json=json)
26
  return res.json()
 
 
 
 
 
 
 
3
 
4
  HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
5
 
6
+ DATASET_NAME_LIMIT = 128
7
+
8
  def create_dataset(auth, dataset_name):
9
  authorization = {"Authorization": auth}
10
  url = f"{HOST_ADDRESS}/v1/kb/create"
 
26
  json = {"kb_id": dataset_id}
27
  res = requests.post(url=url, headers=authorization, json=json)
28
  return res.json()
29
+
30
+ def update_dataset(auth, json_req):
31
+ authorization = {"Authorization": auth}
32
+ url = f"{HOST_ADDRESS}/v1/kb/update"
33
+ res = requests.post(url=url, headers=authorization, json=json_req)
34
+ return res.json()
sdk/python/test/test_frontend_api/test_dataset.py CHANGED
@@ -1,6 +1,8 @@
1
- from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset
2
- import requests
3
-
 
 
4
 
5
  def test_dataset(get_auth):
6
  # create dataset
@@ -56,8 +58,76 @@ def test_dataset_1k_dataset(get_auth):
56
  assert res.get("code") == 0, f"{res.get('message')}"
57
  print(f"{len(dataset_list)} datasets are deleted")
58
 
59
- # delete dataset
60
- # create invalid name dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # update dataset with different parameters
62
- # create duplicated name dataset
63
- #
 
1
+ from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT
2
+ import re
3
+ import pytest
4
+ import random
5
+ import string
6
 
7
  def test_dataset(get_auth):
8
  # create dataset
 
58
  assert res.get("code") == 0, f"{res.get('message')}"
59
  print(f"{len(dataset_list)} datasets are deleted")
60
 
61
+ def test_duplicated_name_dataset(get_auth):
62
+ # create dataset
63
+ for i in range(20):
64
+ res = create_dataset(get_auth, "test_create_dataset")
65
+ assert res.get("code") == 0, f"{res.get('message')}"
66
+
67
+ # list dataset
68
+ res = list_dataset(get_auth, 1)
69
+ data = res.get("data")
70
+ dataset_list = []
71
+ pattern = r'^test_create_dataset.*'
72
+ for item in data:
73
+ dataset_name = item.get("name")
74
+ dataset_id = item.get("id")
75
+ dataset_list.append(dataset_id)
76
+ match = re.match(pattern, dataset_name)
77
+ assert match != None
78
+
79
+ for dataset_id in dataset_list:
80
+ res = rm_dataset(get_auth, dataset_id)
81
+ assert res.get("code") == 0, f"{res.get('message')}"
82
+ print(f"{len(dataset_list)} datasets are deleted")
83
+
84
+ def test_invalid_name_dataset(get_auth):
85
+ # create dataset
86
+ # with pytest.raises(Exception) as e:
87
+ res = create_dataset(get_auth, 0)
88
+ assert res['code'] == 102
89
+
90
+ res = create_dataset(get_auth, "")
91
+ assert res['code'] == 102
92
+
93
+ long_string = ""
94
+
95
+ while len(long_string) <= DATASET_NAME_LIMIT:
96
+ long_string += random.choice(string.ascii_letters + string.digits)
97
+
98
+ res = create_dataset(get_auth, long_string)
99
+ assert res['code'] == 102
100
+ print(res)
101
+
102
+ def test_update_different_params_dataset(get_auth):
103
+ # create dataset
104
+ res = create_dataset(get_auth, "test_create_dataset")
105
+ assert res.get("code") == 0, f"{res.get('message')}"
106
+
107
+ # list dataset
108
+ page_number = 1
109
+ dataset_list = []
110
+ while True:
111
+ res = list_dataset(get_auth, page_number)
112
+ data = res.get("data")
113
+ for item in data:
114
+ dataset_id = item.get("id")
115
+ dataset_list.append(dataset_id)
116
+ if len(dataset_list) < page_number * 150:
117
+ break
118
+ page_number += 1
119
+
120
+ print(f"found {len(dataset_list)} datasets")
121
+ dataset_id = dataset_list[0]
122
+
123
+ json_req = {"kb_id": dataset_id, "name": "test_update_dataset", "description": "test", "permission": "me", "parser_id": "presentation"}
124
+ res = update_dataset(get_auth, json_req)
125
+ assert res.get("code") == 0, f"{res.get('message')}"
126
+
127
+ # delete dataset
128
+ for dataset_id in dataset_list:
129
+ res = rm_dataset(get_auth, dataset_id)
130
+ assert res.get("code") == 0, f"{res.get('message')}"
131
+ print(f"{len(dataset_list)} datasets are deleted")
132
+
133
  # update dataset with different parameters
 
 
printEnvironment.sh → show_env.sh RENAMED
@@ -15,7 +15,7 @@ get_distro_info() {
15
  echo "$distro_id $distro_version (Kernel version: $kernel_version)"
16
  }
17
 
18
- # get Git repo name
19
  git_repo_name=''
20
  if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
21
  git_repo_name=$(basename "$(git rev-parse --show-toplevel)")
@@ -48,8 +48,8 @@ else
48
  python_version="Python not installed"
49
  fi
50
 
51
- # Print all infomation
52
- echo "Current Repo: $git_repo_name"
53
 
54
  # get Commit ID
55
  git_version=$(git log -1 --pretty=format:'%h')
 
15
  echo "$distro_id $distro_version (Kernel version: $kernel_version)"
16
  }
17
 
18
+ # get Git repository name
19
  git_repo_name=''
20
  if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then
21
  git_repo_name=$(basename "$(git rev-parse --show-toplevel)")
 
48
  python_version="Python not installed"
49
  fi
50
 
51
+ # Print all information
52
+ echo "Current Repository: $git_repo_name"
53
 
54
  # get Commit ID
55
  git_version=$(git log -1 --pretty=format:'%h')