liuhua liuhua commited on
Commit
ee8a916
·
1 Parent(s): c4fcec1

Fix some issues in API (#2902)

Browse files

### What problem does this PR solve?

Fix some issues in API

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Co-authored-by: liuhua <[email protected]>

api/apps/sdk/chat.py CHANGED
@@ -30,18 +30,17 @@ from api.utils.api_utils import get_result
30
  @token_required
31
  def create(tenant_id):
32
  req=request.json
33
- if not req.get("knowledgebases"):
34
- return get_error_data_result(retmsg="knowledgebases are required")
35
- kb_list = []
36
- for kb in req.get("knowledgebases"):
37
- if not kb["id"]:
38
- return get_error_data_result(retmsg="knowledgebase needs id")
39
- if not KnowledgebaseService.query(id=kb["id"], tenant_id=tenant_id):
40
- return get_error_data_result(retmsg="you do not own the knowledgebase")
41
- # if not DocumentService.query(kb_id=kb["id"]):
42
- # return get_error_data_result(retmsg="There is a invalid knowledgebase")
43
- kb_list.append(kb["id"])
44
- req["kb_ids"] = kb_list
45
  # llm
46
  llm = req.get("llm")
47
  if llm:
@@ -81,24 +80,24 @@ def create(tenant_id):
81
  else:
82
  req["llm_id"] = tenant.llm_id
83
  if not req.get("name"):
84
- return get_error_data_result(retmsg="name is required.")
85
  if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
86
- return get_error_data_result(retmsg="Duplicated chat name in creating dataset.")
87
  # tenant_id
88
  if req.get("tenant_id"):
89
- return get_error_data_result(retmsg="tenant_id must not be provided.")
90
  req["tenant_id"] = tenant_id
91
  # prompt more parameter
92
  default_prompt = {
93
- "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
94
- 以下是知识库:
95
- {knowledge}
96
- 以上是知识库。""",
97
- "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
98
  "parameters": [
99
  {"key": "knowledge", "optional": False}
100
  ],
101
- "empty_response": "Sorry! 知识库中未找到相关内容!"
102
  }
103
  key_list_2 = ["system", "prologue", "parameters", "empty_response"]
104
  if "prompt_config" not in req:
@@ -149,7 +148,7 @@ def update(tenant_id,chat_id):
149
  req =request.json
150
  if "knowledgebases" in req:
151
  if not req.get("knowledgebases"):
152
- return get_error_data_result(retmsg="knowledgebases can't be empty value")
153
  kb_list = []
154
  for kb in req.get("knowledgebases"):
155
  if not kb["id"]:
@@ -189,10 +188,10 @@ def update(tenant_id,chat_id):
189
  res = res.to_json()
190
  if "llm_id" in req:
191
  if not TenantLLMService.query(llm_name=req["llm_id"]):
192
- return get_error_data_result(retmsg="the model_name does not exist.")
193
  if "name" in req:
194
  if not req.get("name"):
195
- return get_error_data_result(retmsg="name is not empty.")
196
  if req["name"].lower() != res["name"].lower() \
197
  and len(
198
  DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
@@ -224,7 +223,7 @@ def delete(tenant_id):
224
  req = request.json
225
  ids = req.get("ids")
226
  if not ids:
227
- return get_error_data_result(retmsg="ids are required")
228
  for id in ids:
229
  if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
230
  return get_error_data_result(retmsg=f"You don't own the chat {id}")
@@ -234,7 +233,7 @@ def delete(tenant_id):
234
 
235
  @manager.route('/chat', methods=['GET'])
236
  @token_required
237
- def list(tenant_id):
238
  id = request.args.get("id")
239
  name = request.args.get("name")
240
  chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value)
 
30
  @token_required
31
  def create(tenant_id):
32
  req=request.json
33
+ ids= req.get("knowledgebases")
34
+ if not ids:
35
+ return get_error_data_result(retmsg="`knowledgebases` is required")
36
+ for kb_id in ids:
37
+ kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
38
+ if not kbs:
39
+ return get_error_data_result(f"You don't own the dataset {kb_id}")
40
+ kb=kbs[0]
41
+ if kb.chunk_num == 0:
42
+ return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
43
+ req["kb_ids"] = ids
 
44
  # llm
45
  llm = req.get("llm")
46
  if llm:
 
80
  else:
81
  req["llm_id"] = tenant.llm_id
82
  if not req.get("name"):
83
+ return get_error_data_result(retmsg="`name` is required.")
84
  if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
85
+ return get_error_data_result(retmsg="Duplicated chat name in creating chat.")
86
  # tenant_id
87
  if req.get("tenant_id"):
88
+ return get_error_data_result(retmsg="`tenant_id` must not be provided.")
89
  req["tenant_id"] = tenant_id
90
  # prompt more parameter
91
  default_prompt = {
92
+ "system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
93
+ Here is the knowledge base:
94
+ {knowledge}
95
+ The above is the knowledge base.""",
96
+ "prologue": "Hi! I'm your assistant, what can I do for you?",
97
  "parameters": [
98
  {"key": "knowledge", "optional": False}
99
  ],
100
+ "empty_response": "Sorry! No relevant content was found in the knowledge base!"
101
  }
102
  key_list_2 = ["system", "prologue", "parameters", "empty_response"]
103
  if "prompt_config" not in req:
 
148
  req =request.json
149
  if "knowledgebases" in req:
150
  if not req.get("knowledgebases"):
151
+ return get_error_data_result(retmsg="`knowledgebases` can't be empty value")
152
  kb_list = []
153
  for kb in req.get("knowledgebases"):
154
  if not kb["id"]:
 
188
  res = res.to_json()
189
  if "llm_id" in req:
190
  if not TenantLLMService.query(llm_name=req["llm_id"]):
191
+ return get_error_data_result(retmsg="The `model_name` does not exist.")
192
  if "name" in req:
193
  if not req.get("name"):
194
+ return get_error_data_result(retmsg="`name` is not empty.")
195
  if req["name"].lower() != res["name"].lower() \
196
  and len(
197
  DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
 
223
  req = request.json
224
  ids = req.get("ids")
225
  if not ids:
226
+ return get_error_data_result(retmsg="`ids` are required")
227
  for id in ids:
228
  if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
229
  return get_error_data_result(retmsg=f"You don't own the chat {id}")
 
233
 
234
  @manager.route('/chat', methods=['GET'])
235
  @token_required
236
+ def list_chat(tenant_id):
237
  id = request.args.get("id")
238
  name = request.args.get("name")
239
  chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value)
api/apps/sdk/dataset.py CHANGED
@@ -25,28 +25,38 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
28
- from api.utils.api_utils import get_result, token_required,get_error_data_result
 
29
 
30
  @manager.route('/dataset', methods=['POST'])
31
  @token_required
32
  def create(tenant_id):
33
  req = request.json
34
  e, t = TenantService.get_by_id(tenant_id)
 
 
 
 
 
 
 
 
 
35
  if "tenant_id" in req or "embedding_model" in req:
36
  return get_error_data_result(
37
- retmsg="Tenant_id or embedding_model must not be provided")
38
  chunk_count=req.get("chunk_count")
39
  document_count=req.get("document_count")
40
  if chunk_count or document_count:
41
- return get_error_data_result(retmsg="chunk_count or document_count must be 0 or not be provided")
42
  if "name" not in req:
43
  return get_error_data_result(
44
- retmsg="Name is not empty!")
45
  req['id'] = get_uuid()
46
  req["name"] = req["name"].strip()
47
  if req["name"] == "":
48
  return get_error_data_result(
49
- retmsg="Name is not empty string!")
50
  if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
51
  return get_error_data_result(
52
  retmsg="Duplicated knowledgebase name in creating dataset.")
@@ -55,7 +65,7 @@ def create(tenant_id):
55
  key_mapping = {
56
  "chunk_num": "chunk_count",
57
  "doc_num": "document_count",
58
- "parser_id": "parse_method",
59
  "embd_id": "embedding_model"
60
  }
61
  mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
@@ -90,7 +100,7 @@ def delete(tenant_id):
90
  File2DocumentService.delete_by_document_id(doc.id)
91
  if not KnowledgebaseService.delete_by_id(id):
92
  return get_error_data_result(
93
- retmsg="Delete dataset error.(Database serror)")
94
  return get_result(retcode=RetCode.SUCCESS)
95
 
96
  @manager.route('/dataset/<dataset_id>', methods=['PUT'])
@@ -103,30 +113,39 @@ def update(tenant_id,dataset_id):
103
  invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
104
  if any(key in req for key in invalid_keys):
105
  return get_error_data_result(retmsg="The input parameters are invalid.")
 
 
 
 
 
 
 
 
 
106
  if "tenant_id" in req:
107
  if req["tenant_id"] != tenant_id:
108
  return get_error_data_result(
109
- retmsg="Can't change tenant_id.")
110
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
111
  if "chunk_count" in req:
112
  if req["chunk_count"] != kb.chunk_num:
113
  return get_error_data_result(
114
- retmsg="Can't change chunk_count.")
115
  req.pop("chunk_count")
116
  if "document_count" in req:
117
  if req['document_count'] != kb.doc_num:
118
  return get_error_data_result(
119
- retmsg="Can't change document_count.")
120
  req.pop("document_count")
121
- if "parse_method" in req:
122
- if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
123
  return get_error_data_result(
124
- retmsg="If chunk count is not 0, parse method is not changable.")
125
- req['parser_id'] = req.pop('parse_method')
126
  if "embedding_model" in req:
127
- if kb.chunk_num != 0 and req['parse_method'] != kb.parser_id:
128
  return get_error_data_result(
129
- retmsg="If chunk count is not 0, parse method is not changable.")
130
  req['embd_id'] = req.pop('embedding_model')
131
  if "name" in req:
132
  req["name"] = req["name"].strip()
@@ -162,7 +181,7 @@ def list(tenant_id):
162
  key_mapping = {
163
  "chunk_num": "chunk_count",
164
  "doc_num": "document_count",
165
- "parser_id": "parse_method",
166
  "embd_id": "embedding_model"
167
  }
168
  renamed_data = {}
 
25
  from api.db.services.user_service import TenantService
26
  from api.settings import RetCode
27
  from api.utils import get_uuid
28
+ from api.utils.api_utils import get_result, token_required, get_error_data_result, valid
29
+
30
 
31
  @manager.route('/dataset', methods=['POST'])
32
  @token_required
33
  def create(tenant_id):
34
  req = request.json
35
  e, t = TenantService.get_by_id(tenant_id)
36
+ permission = req.get("permission")
37
+ language = req.get("language")
38
+ chunk_method = req.get("chunk_method")
39
+ valid_permission = ("me", "team")
40
+ valid_language =("Chinese", "English")
41
+ valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
42
+ check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
43
+ if check_validation:
44
+ return check_validation
45
  if "tenant_id" in req or "embedding_model" in req:
46
  return get_error_data_result(
47
+ retmsg="`tenant_id` or `embedding_model` must not be provided")
48
  chunk_count=req.get("chunk_count")
49
  document_count=req.get("document_count")
50
  if chunk_count or document_count:
51
+ return get_error_data_result(retmsg="`chunk_count` or `document_count` must be 0 or not be provided")
52
  if "name" not in req:
53
  return get_error_data_result(
54
+ retmsg="`name` is not empty!")
55
  req['id'] = get_uuid()
56
  req["name"] = req["name"].strip()
57
  if req["name"] == "":
58
  return get_error_data_result(
59
+ retmsg="`name` is not empty string!")
60
  if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
61
  return get_error_data_result(
62
  retmsg="Duplicated knowledgebase name in creating dataset.")
 
65
  key_mapping = {
66
  "chunk_num": "chunk_count",
67
  "doc_num": "document_count",
68
+ "parser_id": "chunk_method",
69
  "embd_id": "embedding_model"
70
  }
71
  mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
 
100
  File2DocumentService.delete_by_document_id(doc.id)
101
  if not KnowledgebaseService.delete_by_id(id):
102
  return get_error_data_result(
103
+ retmsg="Delete dataset error.(Database error)")
104
  return get_result(retcode=RetCode.SUCCESS)
105
 
106
  @manager.route('/dataset/<dataset_id>', methods=['PUT'])
 
113
  invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
114
  if any(key in req for key in invalid_keys):
115
  return get_error_data_result(retmsg="The input parameters are invalid.")
116
+ permission = req.get("permission")
117
+ language = req.get("language")
118
+ chunk_method = req.get("chunk_method")
119
+ valid_permission = ("me", "team")
120
+ valid_language =("Chinese", "English")
121
+ valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
122
+ check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
123
+ if check_validation:
124
+ return check_validation
125
  if "tenant_id" in req:
126
  if req["tenant_id"] != tenant_id:
127
  return get_error_data_result(
128
+ retmsg="Can't change `tenant_id`.")
129
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
130
  if "chunk_count" in req:
131
  if req["chunk_count"] != kb.chunk_num:
132
  return get_error_data_result(
133
+ retmsg="Can't change `chunk_count`.")
134
  req.pop("chunk_count")
135
  if "document_count" in req:
136
  if req['document_count'] != kb.doc_num:
137
  return get_error_data_result(
138
+ retmsg="Can't change `document_count`.")
139
  req.pop("document_count")
140
+ if "chunk_method" in req:
141
+ if kb.chunk_num != 0 and req['chunk_method'] != kb.parser_id:
142
  return get_error_data_result(
143
+ retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
144
+ req['parser_id'] = req.pop('chunk_method')
145
  if "embedding_model" in req:
146
+ if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
147
  return get_error_data_result(
148
+ retmsg="If `chunk_count` is not 0, `embedding_method` is not changeable.")
149
  req['embd_id'] = req.pop('embedding_model')
150
  if "name" in req:
151
  req["name"] = req["name"].strip()
 
181
  key_mapping = {
182
  "chunk_num": "chunk_count",
183
  "doc_num": "document_count",
184
+ "parser_id": "chunk_method",
185
  "embd_id": "embedding_model"
186
  }
187
  renamed_data = {}
api/apps/sdk/doc.py CHANGED
@@ -88,20 +88,20 @@ def upload(dataset_id, tenant_id):
88
  def update_doc(tenant_id, dataset_id, document_id):
89
  req = request.json
90
  if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
91
- return get_error_data_result(retmsg='You do not own the dataset.')
92
  doc = DocumentService.query(kb_id=dataset_id, id=document_id)
93
  if not doc:
94
- return get_error_data_result(retmsg='The dataset not own the document.')
95
  doc = doc[0]
96
  if "chunk_count" in req:
97
  if req["chunk_count"] != doc.chunk_num:
98
- return get_error_data_result(retmsg="Can't change chunk_count.")
99
  if "token_count" in req:
100
  if req["token_count"] != doc.token_num:
101
- return get_error_data_result(retmsg="Can't change token_count.")
102
  if "progress" in req:
103
  if req['progress'] != doc.progress:
104
- return get_error_data_result(retmsg="Can't change progress.")
105
 
106
  if "name" in req and req["name"] != doc.name:
107
  if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
@@ -121,8 +121,8 @@ def update_doc(tenant_id, dataset_id, document_id):
121
  FileService.update_by_id(file.id, {"name": req["name"]})
122
  if "parser_config" in req:
123
  DocumentService.update_parser_config(doc.id, req["parser_config"])
124
- if "parser_method" in req:
125
- if doc.parser_id.lower() == req["parser_method"].lower():
126
  return get_result()
127
 
128
  if doc.type == FileType.VISUAL or re.search(
@@ -130,7 +130,7 @@ def update_doc(tenant_id, dataset_id, document_id):
130
  return get_error_data_result(retmsg="Not supported yet!")
131
 
132
  e = DocumentService.update_by_id(doc.id,
133
- {"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
134
  "run": TaskStatus.UNSTART.value})
135
  if not e:
136
  return get_error_data_result(retmsg="Document not found!")
@@ -196,7 +196,7 @@ def list_docs(dataset_id, tenant_id):
196
  "chunk_num": "chunk_count",
197
  "kb_id": "knowledgebase_id",
198
  "token_num": "token_count",
199
- "parser_id": "parser_method"
200
  }
201
  renamed_doc = {}
202
  for key, value in doc.items():
@@ -213,7 +213,7 @@ def delete(tenant_id,dataset_id):
213
  return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
214
  req = request.json
215
  if not req.get("ids"):
216
- return get_error_data_result(retmsg="ids is required")
217
  doc_ids = req["ids"]
218
  root_folder = FileService.get_root_folder(tenant_id)
219
  pf_id = root_folder["id"]
@@ -457,7 +457,7 @@ def rm_chunk(tenant_id,dataset_id,document_id):
457
 
458
  @manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
459
  @token_required
460
- def set(tenant_id,dataset_id,document_id,chunk_id):
461
  try:
462
  res = ELASTICSEARCH.get(
463
  chunk_id, search.index_name(
@@ -519,9 +519,15 @@ def retrieval_test(tenant_id):
519
  req = request.json
520
  if not req.get("datasets"):
521
  return get_error_data_result("`datasets` is required.")
522
- kb_id = req["datasets"]
523
- if isinstance(kb_id, str): kb_id = [kb_id]
524
- for id in kb_id:
 
 
 
 
 
 
525
  if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
526
  return get_error_data_result(f"You don't own the dataset {id}.")
527
  if "question" not in req:
@@ -538,7 +544,7 @@ def retrieval_test(tenant_id):
538
  else:
539
  highlight = True
540
  try:
541
- e, kb = KnowledgebaseService.get_by_id(kb_id[0])
542
  if not e:
543
  return get_error_data_result(retmsg="Knowledgebase not found!")
544
  embd_mdl = TenantLLMService.model_instance(
@@ -554,7 +560,7 @@ def retrieval_test(tenant_id):
554
  question += keyword_extraction(chat_mdl, question)
555
 
556
  retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
557
- ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_id, page, size,
558
  similarity_threshold, vector_similarity_weight, top,
559
  doc_ids, rerank_mdl=rerank_mdl, highlight=highlight)
560
  for c in ranks["chunks"]:
@@ -580,6 +586,6 @@ def retrieval_test(tenant_id):
580
  return get_result(data=ranks)
581
  except Exception as e:
582
  if str(e).find("not_found") > 0:
583
- return get_result(retmsg=f'No chunk found! Check the chunk statu s please!',
584
  retcode=RetCode.DATA_ERROR)
585
  return server_error_response(e)
 
88
  def update_doc(tenant_id, dataset_id, document_id):
89
  req = request.json
90
  if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
91
+ return get_error_data_result(retmsg="You don't own the dataset.")
92
  doc = DocumentService.query(kb_id=dataset_id, id=document_id)
93
  if not doc:
94
+ return get_error_data_result(retmsg="The dataset doesn't own the document.")
95
  doc = doc[0]
96
  if "chunk_count" in req:
97
  if req["chunk_count"] != doc.chunk_num:
98
+ return get_error_data_result(retmsg="Can't change `chunk_count`.")
99
  if "token_count" in req:
100
  if req["token_count"] != doc.token_num:
101
+ return get_error_data_result(retmsg="Can't change `token_count`.")
102
  if "progress" in req:
103
  if req['progress'] != doc.progress:
104
+ return get_error_data_result(retmsg="Can't change `progress`.")
105
 
106
  if "name" in req and req["name"] != doc.name:
107
  if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
 
121
  FileService.update_by_id(file.id, {"name": req["name"]})
122
  if "parser_config" in req:
123
  DocumentService.update_parser_config(doc.id, req["parser_config"])
124
+ if "chunk_method" in req:
125
+ if doc.parser_id.lower() == req["chunk_method"].lower():
126
  return get_result()
127
 
128
  if doc.type == FileType.VISUAL or re.search(
 
130
  return get_error_data_result(retmsg="Not supported yet!")
131
 
132
  e = DocumentService.update_by_id(doc.id,
133
+ {"parser_id": req["chunk_method"], "progress": 0, "progress_msg": "",
134
  "run": TaskStatus.UNSTART.value})
135
  if not e:
136
  return get_error_data_result(retmsg="Document not found!")
 
196
  "chunk_num": "chunk_count",
197
  "kb_id": "knowledgebase_id",
198
  "token_num": "token_count",
199
+ "parser_id": "chunk_method"
200
  }
201
  renamed_doc = {}
202
  for key, value in doc.items():
 
213
  return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
214
  req = request.json
215
  if not req.get("ids"):
216
+ return get_error_data_result(retmsg="`ids` is required")
217
  doc_ids = req["ids"]
218
  root_folder = FileService.get_root_folder(tenant_id)
219
  pf_id = root_folder["id"]
 
457
 
458
  @manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
459
  @token_required
460
+ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
461
  try:
462
  res = ELASTICSEARCH.get(
463
  chunk_id, search.index_name(
 
519
  req = request.json
520
  if not req.get("datasets"):
521
  return get_error_data_result("`datasets` is required.")
522
+ kb_ids = req["datasets"]
523
+ kbs = KnowledgebaseService.get_by_ids(kb_ids)
524
+ embd_nms = list(set([kb.embd_id for kb in kbs]))
525
+ if len(embd_nms) != 1:
526
+ return get_result(
527
+ retmsg='Knowledge bases use different embedding models or does not exist."',
528
+ retcode=RetCode.AUTHENTICATION_ERROR)
529
+ if isinstance(kb_ids, str): kb_ids = [kb_ids]
530
+ for id in kb_ids:
531
  if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
532
  return get_error_data_result(f"You don't own the dataset {id}.")
533
  if "question" not in req:
 
544
  else:
545
  highlight = True
546
  try:
547
+ e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
548
  if not e:
549
  return get_error_data_result(retmsg="Knowledgebase not found!")
550
  embd_mdl = TenantLLMService.model_instance(
 
560
  question += keyword_extraction(chat_mdl, question)
561
 
562
  retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
563
+ ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_ids, page, size,
564
  similarity_threshold, vector_similarity_weight, top,
565
  doc_ids, rerank_mdl=rerank_mdl, highlight=highlight)
566
  for c in ranks["chunks"]:
 
586
  return get_result(data=ranks)
587
  except Exception as e:
588
  if str(e).find("not_found") > 0:
589
+ return get_result(retmsg=f'No chunk found! Check the chunk status please!',
590
  retcode=RetCode.DATA_ERROR)
591
  return server_error_response(e)
api/apps/sdk/session.py CHANGED
@@ -39,7 +39,7 @@ def create(tenant_id,chat_id):
39
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
40
  }
41
  if not conv.get("name"):
42
- return get_error_data_result(retmsg="Name can not be empty.")
43
  ConversationService.save(**conv)
44
  e, conv = ConversationService.get_by_id(conv["id"])
45
  if not e:
@@ -62,11 +62,11 @@ def update(tenant_id,chat_id,session_id):
62
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
63
  return get_error_data_result(retmsg="You do not own the session")
64
  if "message" in req or "messages" in req:
65
- return get_error_data_result(retmsg="Message can not be change")
66
  if "reference" in req:
67
- return get_error_data_result(retmsg="Reference can not be change")
68
  if "name" in req and not req.get("name"):
69
- return get_error_data_result(retmsg="Name can not be empty.")
70
  if not ConversationService.update_by_id(conv_id, req):
71
  return get_error_data_result(retmsg="Session updates error")
72
  return get_result()
@@ -87,7 +87,7 @@ def completion(tenant_id,chat_id):
87
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
88
  }
89
  if not conv.get("name"):
90
- return get_error_data_result(retmsg="Name can not be empty.")
91
  ConversationService.save(**conv)
92
  e, conv = ConversationService.get_by_id(conv["id"])
93
  session_id=conv.id
 
39
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
40
  }
41
  if not conv.get("name"):
42
+ return get_error_data_result(retmsg="`name` can not be empty.")
43
  ConversationService.save(**conv)
44
  e, conv = ConversationService.get_by_id(conv["id"])
45
  if not e:
 
62
  if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
63
  return get_error_data_result(retmsg="You do not own the session")
64
  if "message" in req or "messages" in req:
65
+ return get_error_data_result(retmsg="`message` can not be change")
66
  if "reference" in req:
67
+ return get_error_data_result(retmsg="`reference` can not be change")
68
  if "name" in req and not req.get("name"):
69
+ return get_error_data_result(retmsg="`name` can not be empty.")
70
  if not ConversationService.update_by_id(conv_id, req):
71
  return get_error_data_result(retmsg="Session updates error")
72
  return get_result()
 
87
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
88
  }
89
  if not conv.get("name"):
90
+ return get_error_data_result(retmsg="`name` can not be empty.")
91
  ConversationService.save(**conv)
92
  e, conv = ConversationService.get_by_id(conv["id"])
93
  session_id=conv.id
api/db/db_models.py CHANGED
@@ -879,8 +879,8 @@ class Dialog(DataBaseModel):
879
  default="simple",
880
  help_text="simple|advanced",
881
  index=True)
882
- prompt_config = JSONField(null=False, default={"system": "", "prologue": "您好,我是您的助手小樱,长得可爱又善良,can I help you?",
883
- "parameters": [], "empty_response": "Sorry! 知识库中未找到相关内容!"})
884
 
885
  similarity_threshold = FloatField(default=0.2)
886
  vector_similarity_weight = FloatField(default=0.3)
 
879
  default="simple",
880
  help_text="simple|advanced",
881
  index=True)
882
+ prompt_config = JSONField(null=False, default={"system": "", "prologue": "Hi! I'm your assistant, what can I do for you?",
883
+ "parameters": [], "empty_response": "Sorry! No relevant content was found in the knowledge base!"})
884
 
885
  similarity_threshold = FloatField(default=0.2)
886
  vector_similarity_weight = FloatField(default=0.3)
api/utils/api_utils.py CHANGED
@@ -324,4 +324,17 @@ def get_error_data_result(retmsg='Sorry! Data missing!', retcode=RetCode.DATA_ER
324
 
325
  def generate_confirmation_token(tenent_id):
326
  serializer = URLSafeTimedSerializer(tenent_id)
327
- return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
 
325
  def generate_confirmation_token(tenent_id):
326
  serializer = URLSafeTimedSerializer(tenent_id)
327
+ return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
328
+
329
+
330
+ def valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method):
331
+ if valid_parameter(permission,valid_permission):
332
+ return valid_parameter(permission,valid_permission)
333
+ if valid_parameter(language,valid_language):
334
+ return valid_parameter(language,valid_language)
335
+ if valid_parameter(chunk_method,valid_chunk_method):
336
+ return valid_parameter(chunk_method,valid_chunk_method)
337
+
338
+ def valid_parameter(parameter,valid_values):
339
+ if parameter and parameter not in valid_values:
340
+ return get_error_data_result(f"{parameter} not in {valid_values}")
sdk/python/ragflow/modules/base.py CHANGED
@@ -22,7 +22,7 @@ class Base(object):
22
  res = self.rag.post(path, json, stream=stream,files=files)
23
  return res
24
 
25
- def get(self, path, params):
26
  res = self.rag.get(path, params)
27
  return res
28
 
 
22
  res = self.rag.post(path, json, stream=stream,files=files)
23
  return res
24
 
25
+ def get(self, path, params=None):
26
  res = self.rag.get(path, params)
27
  return res
28
 
sdk/python/ragflow/modules/chat.py CHANGED
@@ -73,6 +73,3 @@ class Chat(Base):
73
  res = res.json()
74
  if res.get("code") != 0:
75
  raise Exception(res.get("message"))
76
-
77
- def get_prologue(self):
78
- return self.prompt.opener
 
73
  res = res.json()
74
  if res.get("code") != 0:
75
  raise Exception(res.get("message"))
 
 
 
sdk/python/ragflow/modules/dataset.py CHANGED
@@ -1,78 +1,78 @@
1
- from typing import Optional, List
2
-
3
- from transformers.models.bloom.modeling_bloom import bloom_gelu_back
4
-
5
- from .document import Document
6
-
7
- from .base import Base
8
-
9
-
10
- class DataSet(Base):
11
- class ParserConfig(Base):
12
- def __init__(self, rag, res_dict):
13
- self.chunk_token_count = 128
14
- self.layout_recognize = True
15
- self.delimiter = '\n!?。;!?'
16
- self.task_page_size = 12
17
- super().__init__(rag, res_dict)
18
-
19
- def __init__(self, rag, res_dict):
20
- self.id = ""
21
- self.name = ""
22
- self.avatar = ""
23
- self.tenant_id = None
24
- self.description = ""
25
- self.language = "English"
26
- self.embedding_model = ""
27
- self.permission = "me"
28
- self.document_count = 0
29
- self.chunk_count = 0
30
- self.parse_method = "naive"
31
- self.parser_config = None
32
- for k in list(res_dict.keys()):
33
- if k not in self.__dict__:
34
- res_dict.pop(k)
35
- super().__init__(rag, res_dict)
36
-
37
- def update(self, update_message: dict):
38
- res = self.put(f'/dataset/{self.id}',
39
- update_message)
40
- res = res.json()
41
- if res.get("code") != 0:
42
- raise Exception(res["message"])
43
-
44
- def upload_documents(self,document_list: List[dict]):
45
- url = f"/dataset/{self.id}/document"
46
- files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
47
- res = self.post(path=url,json=None,files=files)
48
- res = res.json()
49
- if res.get("code") != 0:
50
- raise Exception(res.get("message"))
51
-
52
- def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
53
- res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
54
- res = res.json()
55
- documents = []
56
- if res.get("code") == 0:
57
- for document in res["data"].get("docs"):
58
- documents.append(Document(self.rag,document))
59
- return documents
60
- raise Exception(res["message"])
61
-
62
- def delete_documents(self,ids: List[str] = None):
63
- res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
64
- res = res.json()
65
- if res.get("code") != 0:
66
- raise Exception(res["message"])
67
-
68
- def async_parse_documents(self,document_ids):
69
- res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
70
- res = res.json()
71
- if res.get("code") != 0:
72
- raise Exception(res.get("message"))
73
-
74
- def async_cancel_parse_documents(self,document_ids):
75
- res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
76
- res = res.json()
77
- if res.get("code") != 0:
78
- raise Exception(res.get("message"))
 
1
+ from typing import Optional, List
2
+
3
+ from transformers.models.bloom.modeling_bloom import bloom_gelu_back
4
+
5
+ from .document import Document
6
+
7
+ from .base import Base
8
+
9
+
10
+ class DataSet(Base):
11
+ class ParserConfig(Base):
12
+ def __init__(self, rag, res_dict):
13
+ self.chunk_token_count = 128
14
+ self.layout_recognize = True
15
+ self.delimiter = '\n!?。;!?'
16
+ self.task_page_size = 12
17
+ super().__init__(rag, res_dict)
18
+
19
+ def __init__(self, rag, res_dict):
20
+ self.id = ""
21
+ self.name = ""
22
+ self.avatar = ""
23
+ self.tenant_id = None
24
+ self.description = ""
25
+ self.language = "English"
26
+ self.embedding_model = ""
27
+ self.permission = "me"
28
+ self.document_count = 0
29
+ self.chunk_count = 0
30
+ self.chunk_method = "naive"
31
+ self.parser_config = None
32
+ for k in list(res_dict.keys()):
33
+ if k not in self.__dict__:
34
+ res_dict.pop(k)
35
+ super().__init__(rag, res_dict)
36
+
37
+ def update(self, update_message: dict):
38
+ res = self.put(f'/dataset/{self.id}',
39
+ update_message)
40
+ res = res.json()
41
+ if res.get("code") != 0:
42
+ raise Exception(res["message"])
43
+
44
+ def upload_documents(self,document_list: List[dict]):
45
+ url = f"/dataset/{self.id}/document"
46
+ files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
47
+ res = self.post(path=url,json=None,files=files)
48
+ res = res.json()
49
+ if res.get("code") != 0:
50
+ raise Exception(res.get("message"))
51
+
52
+ def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
53
+ res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
54
+ res = res.json()
55
+ documents = []
56
+ if res.get("code") == 0:
57
+ for document in res["data"].get("docs"):
58
+ documents.append(Document(self.rag,document))
59
+ return documents
60
+ raise Exception(res["message"])
61
+
62
+ def delete_documents(self,ids: List[str] = None):
63
+ res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
64
+ res = res.json()
65
+ if res.get("code") != 0:
66
+ raise Exception(res["message"])
67
+
68
+ def async_parse_documents(self,document_ids):
69
+ res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
70
+ res = res.json()
71
+ if res.get("code") != 0:
72
+ raise Exception(res.get("message"))
73
+
74
+ def async_cancel_parse_documents(self,document_ids):
75
+ res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
76
+ res = res.json()
77
+ if res.get("code") != 0:
78
+ raise Exception(res.get("message"))
sdk/python/ragflow/modules/document.py CHANGED
@@ -1,7 +1,4 @@
1
- import time
2
-
3
- from PIL.ImageFile import raise_oserror
4
-
5
  from .base import Base
6
  from .chunk import Chunk
7
  from typing import List
@@ -13,7 +10,7 @@ class Document(Base):
13
  self.name = ""
14
  self.thumbnail = None
15
  self.knowledgebase_id = None
16
- self.parser_method = ""
17
  self.parser_config = {"pages": [[1, 1000000]]}
18
  self.source_type = "local"
19
  self.type = ""
@@ -32,6 +29,23 @@ class Document(Base):
32
  res_dict.pop(k)
33
  super().__init__(rag, res_dict)
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
36
  data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
37
  res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
 
1
+ import json
 
 
 
2
  from .base import Base
3
  from .chunk import Chunk
4
  from typing import List
 
10
  self.name = ""
11
  self.thumbnail = None
12
  self.knowledgebase_id = None
13
+ self.chunk_method = ""
14
  self.parser_config = {"pages": [[1, 1000000]]}
15
  self.source_type = "local"
16
  self.type = ""
 
29
  res_dict.pop(k)
30
  super().__init__(rag, res_dict)
31
 
32
+
33
+ def update(self, update_message: dict):
34
+ res = self.put(f'/dataset/{self.knowledgebase_id}/info/{self.id}',
35
+ update_message)
36
+ res = res.json()
37
+ if res.get("code") != 0:
38
+ raise Exception(res["message"])
39
+
40
+ def download(self):
41
+ res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
42
+ try:
43
+ res = res.json()
44
+ raise Exception(res.get("message"))
45
+ except json.JSONDecodeError:
46
+ return res.content
47
+
48
+
49
  def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
50
  data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
51
  res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
sdk/python/ragflow/ragflow.py CHANGED
@@ -24,11 +24,11 @@ from .modules.document import Document
24
 
25
 
26
  class RAGFlow:
27
- def __init__(self, user_key, base_url, version='v1'):
28
  """
29
  api_url: http://<host_address>/api/v1
30
  """
31
- self.user_key = user_key
32
  self.api_url = f"{base_url}/api/{version}"
33
  self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
34
 
@@ -50,7 +50,7 @@ class RAGFlow:
50
 
51
  def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
52
  permission: str = "me",
53
- document_count: int = 0, chunk_count: int = 0, parse_method: str = "naive",
54
  parser_config: DataSet.ParserConfig = None) -> DataSet:
55
  if parser_config is None:
56
  parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
@@ -59,7 +59,7 @@ class RAGFlow:
59
  res = self.post("/dataset",
60
  {"name": name, "avatar": avatar, "description": description, "language": language,
61
  "permission": permission,
62
- "document_count": document_count, "chunk_count": chunk_count, "parse_method": parse_method,
63
  "parser_config": parser_config
64
  }
65
  )
@@ -93,7 +93,7 @@ class RAGFlow:
93
  return result_list
94
  raise Exception(res["message"])
95
 
96
- def create_chat(self, name: str = "assistant", avatar: str = "path", knowledgebases: List[DataSet] = [],
97
  llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
98
  datasets = []
99
  for dataset in knowledgebases:
 
24
 
25
 
26
  class RAGFlow:
27
+ def __init__(self, api_key, base_url, version='v1'):
28
  """
29
  api_url: http://<host_address>/api/v1
30
  """
31
+ self.user_key = api_key
32
  self.api_url = f"{base_url}/api/{version}"
33
  self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
34
 
 
50
 
51
  def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
52
  permission: str = "me",
53
+ document_count: int = 0, chunk_count: int = 0, chunk_method: str = "naive",
54
  parser_config: DataSet.ParserConfig = None) -> DataSet:
55
  if parser_config is None:
56
  parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
 
59
  res = self.post("/dataset",
60
  {"name": name, "avatar": avatar, "description": description, "language": language,
61
  "permission": permission,
62
+ "document_count": document_count, "chunk_count": chunk_count, "chunk_method": chunk_method,
63
  "parser_config": parser_config
64
  }
65
  )
 
93
  return result_list
94
  raise Exception(res["message"])
95
 
96
+ def create_chat(self, name: str, avatar: str = "", knowledgebases: List[DataSet] = [],
97
  llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
98
  datasets = []
99
  for dataset in knowledgebases:
sdk/python/test/t_document.py CHANGED
@@ -35,7 +35,7 @@ class TestDocument(TestSdk):
35
  def test_update_document_with_success(self):
36
  """
37
  Test updating a document with success.
38
- Update name or parser_method are supported
39
  """
40
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
41
  ds = rag.list_datasets(name="God")
@@ -43,7 +43,7 @@ class TestDocument(TestSdk):
43
  doc = ds.list_documents()
44
  doc = doc[0]
45
  if isinstance(doc, Document):
46
- res = doc.update({"parser_method":"manual","name":"manual.txt"})
47
  assert res is None, f"Failed to update document, error: {res}"
48
  else:
49
  assert False, f"Failed to get document, error: {doc}"
 
35
  def test_update_document_with_success(self):
36
  """
37
  Test updating a document with success.
38
+ Update name or chunk_method are supported
39
  """
40
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
41
  ds = rag.list_datasets(name="God")
 
43
  doc = ds.list_documents()
44
  doc = doc[0]
45
  if isinstance(doc, Document):
46
+ res = doc.update({"chunk_method":"manual","name":"manual.txt"})
47
  assert res is None, f"Failed to update document, error: {res}"
48
  else:
49
  assert False, f"Failed to get document, error: {doc}"