liuhua liuhua commited on
Commit
cd7d2b9
·
1 Parent(s): 43b4969

Refactor API for document and session (#2819)

Browse files

### What problem does this PR solve?

Refactor API for document and session.

### Type of change


- [x] Refactoring

---------

Co-authored-by: liuhua <[email protected]>

api/apps/sdk/doc.py CHANGED
@@ -4,9 +4,11 @@ import datetime
4
  import json
5
  import traceback
6
 
 
7
  from flask import request
8
  from flask_login import login_required, current_user
9
  from elasticsearch_dsl import Q
 
10
 
11
  from rag.app.qa import rmPrefix, beAdoc
12
  from rag.nlp import search, rag_tokenizer, keyword_extraction
@@ -16,22 +18,22 @@ from api.db import LLMType, ParserType
16
  from api.db.services.knowledgebase_service import KnowledgebaseService
17
  from api.db.services.llm_service import TenantLLMService
18
  from api.db.services.user_service import UserTenantService
19
- from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
20
  from api.db.services.document_service import DocumentService
21
  from api.settings import RetCode, retrievaler, kg_retrievaler
22
- from api.utils.api_utils import get_json_result
23
  import hashlib
24
  import re
25
- from api.utils.api_utils import get_json_result, token_required, get_data_error_result
26
 
27
  from api.db.db_models import Task, File
28
 
29
  from api.db.services.task_service import TaskService, queue_tasks
30
  from api.db.services.user_service import TenantService, UserTenantService
31
 
32
- from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
33
 
34
- from api.utils.api_utils import get_json_result
35
 
36
  from functools import partial
37
  from io import BytesIO
@@ -59,307 +61,163 @@ MAXIMUM_OF_UPLOADING_FILES = 256
59
  MAXIMUM_OF_UPLOADING_FILES = 256
60
 
61
 
62
- @manager.route('/dataset/<dataset_id>/documents/upload', methods=['POST'])
63
  @token_required
64
  def upload(dataset_id, tenant_id):
65
  if 'file' not in request.files:
66
- return get_json_result(
67
- data=False, retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
68
  file_objs = request.files.getlist('file')
69
  for file_obj in file_objs:
70
  if file_obj.filename == '':
71
- return get_json_result(
72
- data=False, retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
73
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
74
  if not e:
75
  raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
76
  err, _ = FileService.upload_document(kb, file_objs, tenant_id)
77
  if err:
78
- return get_json_result(
79
- data=False, retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
80
- return get_json_result(data=True)
81
 
82
 
83
- @manager.route('/infos', methods=['GET'])
84
  @token_required
85
- def docinfos(tenant_id):
86
- req = request.args
87
- if "id" not in req and "name" not in req:
88
- return get_data_error_result(
89
- retmsg="Id or name should be provided")
90
- doc_id=None
91
- if "id" in req:
92
- doc_id = req["id"]
93
- if "name" in req:
94
- doc_name = req["name"]
95
- doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
96
- e, doc = DocumentService.get_by_id(doc_id)
97
- #rename key's name
98
- key_mapping = {
99
- "chunk_num": "chunk_count",
100
- "kb_id": "knowledgebase_id",
101
- "token_num": "token_count",
102
- "parser_id":"parser_method",
103
- }
104
- renamed_doc = {}
105
- for key, value in doc.to_dict().items():
106
- new_key = key_mapping.get(key, key)
107
- renamed_doc[new_key] = value
108
-
109
- return get_json_result(data=renamed_doc)
110
-
111
-
112
- @manager.route('/save', methods=['POST'])
113
- @token_required
114
- def save_doc(tenant_id):
115
  req = request.json
116
- #get doc by id or name
117
- doc_id = None
118
- if "id" in req:
119
- doc_id = req["id"]
120
- elif "name" in req:
121
- doc_name = req["name"]
122
- doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
123
- if not doc_id:
124
- return get_json_result(retcode=400, retmsg="Document ID or name is required")
125
- e, doc = DocumentService.get_by_id(doc_id)
126
- if not e:
127
- return get_data_error_result(retmsg="Document not found!")
128
- #other value can't be changed
129
  if "chunk_count" in req:
130
  if req["chunk_count"] != doc.chunk_num:
131
- return get_data_error_result(
132
- retmsg="Can't change chunk_count.")
133
  if "token_count" in req:
134
  if req["token_count"] != doc.token_num:
135
- return get_data_error_result(
136
- retmsg="Can't change token_count.")
137
  if "progress" in req:
138
  if req['progress'] != doc.progress:
139
- return get_data_error_result(
140
- retmsg="Can't change progress.")
141
- #change name or parse_method
142
- if "name" in req and req["name"] != doc.name:
143
- try:
144
- if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
145
- doc.name.lower()).suffix:
146
- return get_json_result(
147
- data=False,
148
- retmsg="The extension of file can't be changed",
149
- retcode=RetCode.ARGUMENT_ERROR)
150
- for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
151
- if d.name == req["name"]:
152
- return get_data_error_result(
153
- retmsg="Duplicated document name in the same knowledgebase.")
154
-
155
- if not DocumentService.update_by_id(
156
- doc_id, {"name": req["name"]}):
157
- return get_data_error_result(
158
- retmsg="Database error (Document rename)!")
159
-
160
- informs = File2DocumentService.get_by_document_id(doc_id)
161
- if informs:
162
- e, file = FileService.get_by_id(informs[0].file_id)
163
- FileService.update_by_id(file.id, {"name": req["name"]})
164
- except Exception as e:
165
- return server_error_response(e)
166
- if "parser_method" in req:
167
- try:
168
- if doc.parser_id.lower() == req["parser_method"].lower():
169
- if "parser_config" in req:
170
- if req["parser_config"] == doc.parser_config:
171
- return get_json_result(data=True)
172
- else:
173
- return get_json_result(data=True)
174
-
175
- if doc.type == FileType.VISUAL or re.search(
176
- r"\.(ppt|pptx|pages)$", doc.name):
177
- return get_data_error_result(retmsg="Not supported yet!")
178
-
179
- e = DocumentService.update_by_id(doc.id,
180
- {"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
181
- "run": TaskStatus.UNSTART.value})
182
- if not e:
183
- return get_data_error_result(retmsg="Document not found!")
184
- if "parser_config" in req:
185
- DocumentService.update_parser_config(doc.id, req["parser_config"])
186
- if doc.token_num > 0:
187
- e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
188
- doc.process_duation * -1)
189
- if not e:
190
- return get_data_error_result(retmsg="Document not found!")
191
- tenant_id = DocumentService.get_tenant_id(req["id"])
192
- if not tenant_id:
193
- return get_data_error_result(retmsg="Tenant not found!")
194
- ELASTICSEARCH.deleteByQuery(
195
- Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
196
- except Exception as e:
197
- return server_error_response(e)
198
- return get_json_result(data=True)
199
-
200
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
- @manager.route('/change_parser', methods=['POST'])
203
- @token_required
204
- def change_parser(tenant_id):
205
- req = request.json
206
- try:
207
- e, doc = DocumentService.get_by_id(req["doc_id"])
208
- if not e:
209
- return get_data_error_result(retmsg="Document not found!")
210
- if doc.parser_id.lower() == req["parser_id"].lower():
211
  if "parser_config" in req:
212
  if req["parser_config"] == doc.parser_config:
213
- return get_json_result(data=True)
214
  else:
215
- return get_json_result(data=True)
216
 
217
  if doc.type == FileType.VISUAL or re.search(
218
  r"\.(ppt|pptx|pages)$", doc.name):
219
- return get_data_error_result(retmsg="Not supported yet!")
220
 
221
  e = DocumentService.update_by_id(doc.id,
222
- {"parser_id": req["parser_id"], "progress": 0, "progress_msg": "",
223
  "run": TaskStatus.UNSTART.value})
224
  if not e:
225
- return get_data_error_result(retmsg="Document not found!")
226
- if "parser_config" in req:
227
- DocumentService.update_parser_config(doc.id, req["parser_config"])
228
  if doc.token_num > 0:
229
  e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
230
  doc.process_duation * -1)
231
  if not e:
232
- return get_data_error_result(retmsg="Document not found!")
233
- tenant_id = DocumentService.get_tenant_id(req["doc_id"])
234
  if not tenant_id:
235
- return get_data_error_result(retmsg="Tenant not found!")
236
  ELASTICSEARCH.deleteByQuery(
237
  Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
 
 
238
 
239
- return get_json_result(data=True)
240
- except Exception as e:
241
- return server_error_response(e)
242
-
243
- @manager.route('/rename', methods=['POST'])
244
- @login_required
245
- @validate_request("doc_id", "name")
246
- def rename():
247
- req = request.json
248
- try:
249
- e, doc = DocumentService.get_by_id(req["doc_id"])
250
- if not e:
251
- return get_data_error_result(retmsg="Document not found!")
252
- if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
253
- doc.name.lower()).suffix:
254
- return get_json_result(
255
- data=False,
256
- retmsg="The extension of file can't be changed",
257
- retcode=RetCode.ARGUMENT_ERROR)
258
- for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
259
- if d.name == req["name"]:
260
- return get_data_error_result(
261
- retmsg="Duplicated document name in the same knowledgebase.")
262
-
263
- if not DocumentService.update_by_id(
264
- req["doc_id"], {"name": req["name"]}):
265
- return get_data_error_result(
266
- retmsg="Database error (Document rename)!")
267
-
268
- informs = File2DocumentService.get_by_document_id(req["doc_id"])
269
- if informs:
270
- e, file = FileService.get_by_id(informs[0].file_id)
271
- FileService.update_by_id(file.id, {"name": req["name"]})
272
-
273
- return get_json_result(data=True)
274
- except Exception as e:
275
- return server_error_response(e)
276
 
277
 
278
- @manager.route("/<document_id>", methods=["GET"])
279
  @token_required
280
- def download_document(document_id,tenant_id):
281
- try:
282
- # Check whether there is this document
283
- exist, document = DocumentService.get_by_id(document_id)
284
- if not exist:
285
- return construct_json_result(message=f"This document '{document_id}' cannot be found!",
286
- code=RetCode.ARGUMENT_ERROR)
287
-
288
- # The process of downloading
289
- doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
290
- file_stream = STORAGE_IMPL.get(doc_id, doc_location)
291
- if not file_stream:
292
- return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
293
-
294
- file = BytesIO(file_stream)
295
-
296
- # Use send_file with a proper filename and MIME type
297
- return send_file(
298
- file,
299
- as_attachment=True,
300
- download_name=document.name,
301
- mimetype='application/octet-stream' # Set a default MIME type
302
- )
303
-
304
- # Error
305
- except Exception as e:
306
- return construct_error_response(e)
307
-
308
-
309
- @manager.route('/dataset/<dataset_id>/documents', methods=['GET'])
310
  @token_required
311
  def list_docs(dataset_id, tenant_id):
312
- kb_id = request.args.get("knowledgebase_id")
313
- if not kb_id:
314
- return get_json_result(
315
- data=False, retmsg='Lack of "KB ID"', retcode=RetCode.ARGUMENT_ERROR)
316
- tenants = UserTenantService.query(user_id=tenant_id)
317
- for tenant in tenants:
318
- if KnowledgebaseService.query(
319
- tenant_id=tenant.tenant_id, id=kb_id):
320
- break
321
- else:
322
- return get_json_result(
323
- data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
324
- retcode=RetCode.OPERATING_ERROR)
325
- keywords = request.args.get("keywords", "")
326
-
327
- page_number = int(request.args.get("page", 1))
328
- items_per_page = int(request.args.get("page_size", 15))
329
  orderby = request.args.get("orderby", "create_time")
330
- desc = request.args.get("desc", True)
331
- try:
332
- docs, tol = DocumentService.get_by_kb_id(
333
- kb_id, page_number, items_per_page, orderby, desc, keywords)
 
334
 
335
- # rename key's name
336
- renamed_doc_list = []
337
- for doc in docs:
338
- key_mapping = {
339
- "chunk_num": "chunk_count",
340
- "kb_id": "knowledgebase_id",
341
- "token_num": "token_count",
342
- "parser_id":"parser_method"
343
- }
344
- renamed_doc = {}
345
- for key, value in doc.items():
346
- new_key = key_mapping.get(key, key)
347
- renamed_doc[new_key] = value
348
- renamed_doc_list.append(renamed_doc)
349
- return get_json_result(data={"total": tol, "docs": renamed_doc_list})
350
- except Exception as e:
351
- return server_error_response(e)
352
 
353
 
354
- @manager.route('/delete', methods=['DELETE'])
355
  @token_required
356
- def rm(tenant_id):
357
- req = request.args
358
- if "document_id" not in req:
359
- return get_data_error_result(
360
- retmsg="doc_id is required")
361
- doc_ids = req["document_id"]
362
- if isinstance(doc_ids, str): doc_ids = [doc_ids]
363
  root_folder = FileService.get_root_folder(tenant_id)
364
  pf_id = root_folder["id"]
365
  FileService.init_knowledgebase_docs(pf_id, tenant_id)
@@ -368,15 +226,15 @@ def rm(tenant_id):
368
  try:
369
  e, doc = DocumentService.get_by_id(doc_id)
370
  if not e:
371
- return get_data_error_result(retmsg="Document not found!")
372
  tenant_id = DocumentService.get_tenant_id(doc_id)
373
  if not tenant_id:
374
- return get_data_error_result(retmsg="Tenant not found!")
375
 
376
  b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
377
 
378
  if not DocumentService.remove_document(doc, tenant_id):
379
- return get_data_error_result(
380
  retmsg="Database error (Document removal)!")
381
 
382
  f2d = File2DocumentService.get_by_document_id(doc_id)
@@ -388,80 +246,69 @@ def rm(tenant_id):
388
  errors += str(e)
389
 
390
  if errors:
391
- return get_json_result(data=False, retmsg=errors, retcode=RetCode.SERVER_ERROR)
392
 
393
- return get_json_result(data=True, retmsg="success")
394
-
395
- @manager.route("/<document_id>/status", methods=["GET"])
396
- @token_required
397
- def show_parsing_status(tenant_id, document_id):
398
- try:
399
- # valid document
400
- exist, _ = DocumentService.get_by_id(document_id)
401
- if not exist:
402
- return construct_json_result(code=RetCode.DATA_ERROR,
403
- message=f"This document: '{document_id}' is not a valid document.")
404
-
405
- _, doc = DocumentService.get_by_id(document_id) # get doc object
406
- doc_attributes = doc.to_dict()
407
-
408
- return construct_json_result(
409
- data={"progress": doc_attributes["progress"], "status": TaskStatus(doc_attributes["status"]).name},
410
- code=RetCode.SUCCESS
411
- )
412
- except Exception as e:
413
- return construct_error_response(e)
414
 
415
 
416
-
417
- @manager.route('/run', methods=['POST'])
418
  @token_required
419
- def run(tenant_id):
 
 
420
  req = request.json
421
- try:
422
- for id in req["document_ids"]:
423
- info = {"run": str(req["run"]), "progress": 0}
424
- if str(req["run"]) == TaskStatus.RUNNING.value:
425
- info["progress_msg"] = ""
426
- info["chunk_num"] = 0
427
- info["token_num"] = 0
428
- DocumentService.update_by_id(id, info)
429
- # if str(req["run"]) == TaskStatus.CANCEL.value:
430
- tenant_id = DocumentService.get_tenant_id(id)
431
- if not tenant_id:
432
- return get_data_error_result(retmsg="Tenant not found!")
433
- ELASTICSEARCH.deleteByQuery(
434
- Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
435
-
436
- if str(req["run"]) == TaskStatus.RUNNING.value:
437
- TaskService.filter_delete([Task.doc_id == id])
438
- e, doc = DocumentService.get_by_id(id)
439
- doc = doc.to_dict()
440
- doc["tenant_id"] = tenant_id
441
- bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
442
- queue_tasks(doc, bucket, name)
443
-
444
- return get_json_result(data=True)
445
- except Exception as e:
446
- return server_error_response(e)
447
-
448
-
449
- @manager.route('/chunk/list', methods=['POST'])
450
  @token_required
451
- @validate_request("document_id")
452
- def list_chunk(tenant_id):
 
453
  req = request.json
454
- doc_id = req["document_id"]
455
- page = int(req.get("page", 1))
456
- size = int(req.get("size", 30))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
457
  question = req.get("keywords", "")
458
  try:
459
- tenant_id = DocumentService.get_tenant_id(req["document_id"])
460
- if not tenant_id:
461
- return get_data_error_result(retmsg="Tenant not found!")
462
- e, doc = DocumentService.get_by_id(doc_id)
463
- if not e:
464
- return get_data_error_result(retmsg="Document not found!")
465
  query = {
466
  "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
467
  }
@@ -470,7 +317,7 @@ def list_chunk(tenant_id):
470
  sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
471
  res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
472
 
473
- origin_chunks=[]
474
  for id in sres.ids:
475
  d = {
476
  "chunk_id": id,
@@ -490,7 +337,7 @@ def list_chunk(tenant_id):
490
  poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
491
  float(d["positions"][i + 3]), float(d["positions"][i + 4])])
492
  d["positions"] = poss
493
-
494
  origin_chunks.append(d)
495
  ##rename keys
496
  for chunk in origin_chunks:
@@ -499,28 +346,34 @@ def list_chunk(tenant_id):
499
  "content_with_weight": "content",
500
  "doc_id": "document_id",
501
  "important_kwd": "important_keywords",
502
- "img_id":"image_id",
503
  }
504
  renamed_chunk = {}
505
  for key, value in chunk.items():
506
  new_key = key_mapping.get(key, key)
507
  renamed_chunk[new_key] = value
508
  res["chunks"].append(renamed_chunk)
509
- return get_json_result(data=res)
510
  except Exception as e:
511
  if str(e).find("not_found") > 0:
512
- return get_json_result(data=False, retmsg=f'No chunk found!',
513
  retcode=RetCode.DATA_ERROR)
514
  return server_error_response(e)
515
 
516
 
517
- @manager.route('/chunk/create', methods=['POST'])
518
  @token_required
519
- @validate_request("document_id", "content")
520
- def create(tenant_id):
 
 
 
 
521
  req = request.json
 
 
522
  md5 = hashlib.md5()
523
- md5.update((req["content"] + req["document_id"]).encode("utf-8"))
524
 
525
  chunk_id = md5.hexdigest()
526
  d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
@@ -530,80 +383,77 @@ def create(tenant_id):
530
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
531
  d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
532
  d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
533
 
534
- try:
535
- e, doc = DocumentService.get_by_id(req["document_id"])
536
- if not e:
537
- return get_data_error_result(retmsg="Document not found!")
538
- d["kb_id"] = [doc.kb_id]
539
- d["docnm_kwd"] = doc.name
540
- d["doc_id"] = doc.id
541
-
542
- tenant_id = DocumentService.get_tenant_id(req["document_id"])
543
- if not tenant_id:
544
- return get_data_error_result(retmsg="Tenant not found!")
545
-
546
- embd_id = DocumentService.get_embd_id(req["document_id"])
547
- embd_mdl = TenantLLMService.model_instance(
548
- tenant_id, LLMType.EMBEDDING.value, embd_id)
549
-
550
- v, c = embd_mdl.encode([doc.name, req["content"]])
551
- v = 0.1 * v[0] + 0.9 * v[1]
552
- d["q_%d_vec" % len(v)] = v.tolist()
553
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
554
-
555
- DocumentService.increment_chunk_num(
556
- doc.id, doc.kb_id, c, 1, 0)
557
- d["chunk_id"] = chunk_id
558
- #rename keys
559
- key_mapping = {
560
- "chunk_id": "id",
561
- "content_with_weight": "content",
562
- "doc_id": "document_id",
563
- "important_kwd": "important_keywords",
564
- "kb_id":"dataset_id",
565
- "create_timestamp_flt":"create_timestamp",
566
- "create_time": "create_time",
567
- "document_keyword":"document",
568
- }
569
- renamed_chunk = {}
570
- for key, value in d.items():
571
- if key in key_mapping:
572
- new_key = key_mapping.get(key, key)
573
- renamed_chunk[new_key] = value
574
 
575
- return get_json_result(data={"chunk": renamed_chunk})
576
- # return get_json_result(data={"chunk_id": chunk_id})
577
- except Exception as e:
578
- return server_error_response(e)
579
-
580
- @manager.route('/chunk/rm', methods=['POST'])
581
  @token_required
582
- @validate_request("chunk_ids", "document_id")
583
- def rm_chunk(tenant_id):
 
 
 
 
584
  req = request.json
585
- try:
586
- if not ELASTICSEARCH.deleteByQuery(
587
- Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
588
- return get_data_error_result(retmsg="Index updating failure")
589
- e, doc = DocumentService.get_by_id(req["document_id"])
590
- if not e:
591
- return get_data_error_result(retmsg="Document not found!")
592
- deleted_chunk_ids = req["chunk_ids"]
593
- chunk_number = len(deleted_chunk_ids)
594
- DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
595
- return get_json_result(data=True)
596
- except Exception as e:
597
- return server_error_response(e)
598
 
599
- @manager.route('/chunk/set', methods=['POST'])
600
  @token_required
601
- @validate_request("document_id", "chunk_id", "content",
602
- "important_keywords")
603
- def set(tenant_id):
 
 
 
604
  req = request.json
 
 
 
 
605
  d = {
606
- "id": req["chunk_id"],
607
  "content_with_weight": req["content"]}
608
  d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
609
  d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
@@ -611,71 +461,54 @@ def set(tenant_id):
611
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
612
  if "available" in req:
613
  d["available_int"] = req["available"]
614
-
615
- try:
616
- tenant_id = DocumentService.get_tenant_id(req["document_id"])
617
- if not tenant_id:
618
- return get_data_error_result(retmsg="Tenant not found!")
619
-
620
- embd_id = DocumentService.get_embd_id(req["document_id"])
621
- embd_mdl = TenantLLMService.model_instance(
622
- tenant_id, LLMType.EMBEDDING.value, embd_id)
623
-
624
- e, doc = DocumentService.get_by_id(req["document_id"])
625
- if not e:
626
- return get_data_error_result(retmsg="Document not found!")
627
-
628
- if doc.parser_id == ParserType.QA:
629
- arr = [
630
- t for t in re.split(
631
- r"[\n\t]",
632
- req["content"]) if len(t) > 1]
633
- if len(arr) != 2:
634
- return get_data_error_result(
635
- retmsg="Q&A must be separated by TAB/ENTER key.")
636
- q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
637
- d = beAdoc(d, arr[0], arr[1], not any(
638
- [rag_tokenizer.is_chinese(t) for t in q + a]))
639
-
640
- v, c = embd_mdl.encode([doc.name, req["content"]])
641
- v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
642
- d["q_%d_vec" % len(v)] = v.tolist()
643
- ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
644
- return get_json_result(data=True)
645
- except Exception as e:
646
- return server_error_response(e)
647
-
648
- @manager.route('/retrieval_test', methods=['POST'])
649
  @token_required
650
- @validate_request("knowledgebase_id", "question")
651
  def retrieval_test(tenant_id):
652
- req = request.json
653
- page = int(req.get("page", 1))
654
- size = int(req.get("size", 30))
 
 
 
 
 
 
 
655
  question = req["question"]
656
- kb_id = req["knowledgebase_id"]
657
  if isinstance(kb_id, str): kb_id = [kb_id]
658
- doc_ids = req.get("doc_ids", [])
659
  similarity_threshold = float(req.get("similarity_threshold", 0.2))
660
  vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
661
  top = int(req.get("top_k", 1024))
662
 
663
  try:
664
- tenants = UserTenantService.query(user_id=tenant_id)
665
- for kid in kb_id:
666
- for tenant in tenants:
667
- if KnowledgebaseService.query(
668
- tenant_id=tenant.tenant_id, id=kid):
669
- break
670
- else:
671
- return get_json_result(
672
- data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
673
- retcode=RetCode.OPERATING_ERROR)
674
-
675
  e, kb = KnowledgebaseService.get_by_id(kb_id[0])
676
  if not e:
677
- return get_data_error_result(retmsg="Knowledgebase not found!")
678
-
679
  embd_mdl = TenantLLMService.model_instance(
680
  kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
681
 
@@ -697,24 +530,24 @@ def retrieval_test(tenant_id):
697
  del c["vector"]
698
 
699
  ##rename keys
700
- renamed_chunks=[]
701
  for chunk in ranks["chunks"]:
702
  key_mapping = {
703
  "chunk_id": "id",
704
  "content_with_weight": "content",
705
  "doc_id": "document_id",
706
  "important_kwd": "important_keywords",
707
- "docnm_kwd":"document_keyword"
708
  }
709
- rename_chunk={}
710
  for key, value in chunk.items():
711
  new_key = key_mapping.get(key, key)
712
  rename_chunk[new_key] = value
713
  renamed_chunks.append(rename_chunk)
714
  ranks["chunks"] = renamed_chunks
715
- return get_json_result(data=ranks)
716
  except Exception as e:
717
  if str(e).find("not_found") > 0:
718
- return get_json_result(data=False, retmsg=f'No chunk found! Check the chunk status please!',
719
  retcode=RetCode.DATA_ERROR)
720
  return server_error_response(e)
 
4
  import json
5
  import traceback
6
 
7
+ from botocore.docs.method import document_model_driven_method
8
  from flask import request
9
  from flask_login import login_required, current_user
10
  from elasticsearch_dsl import Q
11
+ from sphinx.addnodes import document
12
 
13
  from rag.app.qa import rmPrefix, beAdoc
14
  from rag.nlp import search, rag_tokenizer, keyword_extraction
 
18
  from api.db.services.knowledgebase_service import KnowledgebaseService
19
  from api.db.services.llm_service import TenantLLMService
20
  from api.db.services.user_service import UserTenantService
21
+ from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
22
  from api.db.services.document_service import DocumentService
23
  from api.settings import RetCode, retrievaler, kg_retrievaler
24
+ from api.utils.api_utils import get_result
25
  import hashlib
26
  import re
27
+ from api.utils.api_utils import get_result, token_required, get_error_data_result
28
 
29
  from api.db.db_models import Task, File
30
 
31
  from api.db.services.task_service import TaskService, queue_tasks
32
  from api.db.services.user_service import TenantService, UserTenantService
33
 
34
+ from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
35
 
36
+ from api.utils.api_utils import get_result, get_result, get_error_data_result
37
 
38
  from functools import partial
39
  from io import BytesIO
 
61
  MAXIMUM_OF_UPLOADING_FILES = 256
62
 
63
 
64
+ @manager.route('/dataset/<dataset_id>/document', methods=['POST'])
65
  @token_required
66
  def upload(dataset_id, tenant_id):
67
  if 'file' not in request.files:
68
+ return get_error_data_result(
69
+ retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
70
  file_objs = request.files.getlist('file')
71
  for file_obj in file_objs:
72
  if file_obj.filename == '':
73
+ return get_result(
74
+ retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
75
  e, kb = KnowledgebaseService.get_by_id(dataset_id)
76
  if not e:
77
  raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
78
  err, _ = FileService.upload_document(kb, file_objs, tenant_id)
79
  if err:
80
+ return get_result(
81
+ retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
82
+ return get_result()
83
 
84
 
85
+ @manager.route('/dataset/<dataset_id>/info/<document_id>', methods=['PUT'])
86
  @token_required
87
+ def update_doc(tenant_id, dataset_id, document_id):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  req = request.json
89
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
90
+ return get_error_data_result(retmsg='You do not own the dataset.')
91
+ doc = DocumentService.query(kb_id=dataset_id, id=document_id)
92
+ if not doc:
93
+ return get_error_data_result(retmsg='The dataset not own the document.')
94
+ doc = doc[0]
 
 
 
 
 
 
 
95
  if "chunk_count" in req:
96
  if req["chunk_count"] != doc.chunk_num:
97
+ return get_error_data_result(retmsg="Can't change chunk_count.")
 
98
  if "token_count" in req:
99
  if req["token_count"] != doc.token_num:
100
+ return get_error_data_result(retmsg="Can't change token_count.")
 
101
  if "progress" in req:
102
  if req['progress'] != doc.progress:
103
+ return get_error_data_result(retmsg="Can't change progress.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ if "name" in req and req["name"] != doc.name:
106
+ if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
107
+ return get_result(retmsg="The extension of file can't be changed", retcode=RetCode.ARGUMENT_ERROR)
108
+ for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
109
+ if d.name == req["name"]:
110
+ return get_error_data_result(
111
+ retmsg="Duplicated document name in the same knowledgebase.")
112
+ if not DocumentService.update_by_id(
113
+ document_id, {"name": req["name"]}):
114
+ return get_error_data_result(
115
+ retmsg="Database error (Document rename)!")
116
 
117
+ informs = File2DocumentService.get_by_document_id(document_id)
118
+ if informs:
119
+ e, file = FileService.get_by_id(informs[0].file_id)
120
+ FileService.update_by_id(file.id, {"name": req["name"]})
121
+ if "parser_method" in req:
122
+ if doc.parser_id.lower() == req["parser_method"].lower():
 
 
 
123
  if "parser_config" in req:
124
  if req["parser_config"] == doc.parser_config:
125
+ return get_result(retcode=RetCode.SUCCESS)
126
  else:
127
+ return get_result(retcode=RetCode.SUCCESS)
128
 
129
  if doc.type == FileType.VISUAL or re.search(
130
  r"\.(ppt|pptx|pages)$", doc.name):
131
+ return get_error_data_result(retmsg="Not supported yet!")
132
 
133
  e = DocumentService.update_by_id(doc.id,
134
+ {"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
135
  "run": TaskStatus.UNSTART.value})
136
  if not e:
137
+ return get_error_data_result(retmsg="Document not found!")
 
 
138
  if doc.token_num > 0:
139
  e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
140
  doc.process_duation * -1)
141
  if not e:
142
+ return get_error_data_result(retmsg="Document not found!")
143
+ tenant_id = DocumentService.get_tenant_id(req["id"])
144
  if not tenant_id:
145
+ return get_error_data_result(retmsg="Tenant not found!")
146
  ELASTICSEARCH.deleteByQuery(
147
  Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
148
+ if "parser_config" in req:
149
+ DocumentService.update_parser_config(doc.id, req["parser_config"])
150
 
151
+ return get_result()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
 
154
+ @manager.route('/dataset/<dataset_id>/document/<document_id>', methods=['GET'])
155
  @token_required
156
+ def download(tenant_id, dataset_id, document_id):
157
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
158
+ return get_error_data_result(retmsg=f'You do not own the dataset {dataset_id}.')
159
+ doc = DocumentService.query(kb_id=dataset_id, id=document_id)
160
+ if not doc:
161
+ return get_error_data_result(retmsg=f'The dataset not own the document {doc.id}.')
162
+ # The process of downloading
163
+ doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
164
+ file_stream = STORAGE_IMPL.get(doc_id, doc_location)
165
+ if not file_stream:
166
+ return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
167
+ file = BytesIO(file_stream)
168
+ # Use send_file with a proper filename and MIME type
169
+ return send_file(
170
+ file,
171
+ as_attachment=True,
172
+ download_name=doc[0].name,
173
+ mimetype='application/octet-stream' # Set a default MIME type
174
+ )
175
+
176
+
177
+ @manager.route('/dataset/<dataset_id>/info', methods=['GET'])
 
 
 
 
 
 
 
 
178
  @token_required
179
  def list_docs(dataset_id, tenant_id):
180
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
181
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
182
+ id = request.args.get("id")
183
+ if not DocumentService.query(id=id,kb_id=dataset_id):
184
+ return get_error_data_result(retmsg=f"You don't own the document {id}.")
185
+ offset = int(request.args.get("offset", 1))
186
+ keywords = request.args.get("keywords","")
187
+ limit = int(request.args.get("limit", 1024))
 
 
 
 
 
 
 
 
 
188
  orderby = request.args.get("orderby", "create_time")
189
+ if request.args.get("desc") == "False":
190
+ desc = False
191
+ else:
192
+ desc = True
193
+ docs, tol = DocumentService.get_list(dataset_id, offset, limit, orderby, desc, keywords, id)
194
 
195
+ # rename key's name
196
+ renamed_doc_list = []
197
+ for doc in docs:
198
+ key_mapping = {
199
+ "chunk_num": "chunk_count",
200
+ "kb_id": "knowledgebase_id",
201
+ "token_num": "token_count",
202
+ "parser_id": "parser_method"
203
+ }
204
+ renamed_doc = {}
205
+ for key, value in doc.items():
206
+ new_key = key_mapping.get(key, key)
207
+ renamed_doc[new_key] = value
208
+ renamed_doc_list.append(renamed_doc)
209
+ return get_result(data={"total": tol, "docs": renamed_doc_list})
 
 
210
 
211
 
212
+ @manager.route('/dataset/<dataset_id>/document', methods=['DELETE'])
213
  @token_required
214
+ def delete(tenant_id,dataset_id):
215
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
216
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
217
+ req = request.json
218
+ if not req.get("ids"):
219
+ return get_error_data_result(retmsg="ids is required")
220
+ doc_ids = req["ids"]
221
  root_folder = FileService.get_root_folder(tenant_id)
222
  pf_id = root_folder["id"]
223
  FileService.init_knowledgebase_docs(pf_id, tenant_id)
 
226
  try:
227
  e, doc = DocumentService.get_by_id(doc_id)
228
  if not e:
229
+ return get_error_data_result(retmsg="Document not found!")
230
  tenant_id = DocumentService.get_tenant_id(doc_id)
231
  if not tenant_id:
232
+ return get_error_data_result(retmsg="Tenant not found!")
233
 
234
  b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
235
 
236
  if not DocumentService.remove_document(doc, tenant_id):
237
+ return get_error_data_result(
238
  retmsg="Database error (Document removal)!")
239
 
240
  f2d = File2DocumentService.get_by_document_id(doc_id)
 
246
  errors += str(e)
247
 
248
  if errors:
249
+ return get_result(retmsg=errors, retcode=RetCode.SERVER_ERROR)
250
 
251
+ return get_result()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
 
253
 
254
+ @manager.route('/dataset/<dataset_id>/chunk', methods=['POST'])
 
255
  @token_required
256
+ def parse(tenant_id,dataset_id):
257
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
258
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
259
  req = request.json
260
+ for id in req["document_ids"]:
261
+ if not DocumentService.query(id=id,kb_id=dataset_id):
262
+ return get_error_data_result(retmsg=f"You don't own the document {id}.")
263
+ info = {"run": "1", "progress": 0}
264
+ info["progress_msg"] = ""
265
+ info["chunk_num"] = 0
266
+ info["token_num"] = 0
267
+ DocumentService.update_by_id(id, info)
268
+ # if str(req["run"]) == TaskStatus.CANCEL.value:
269
+ ELASTICSEARCH.deleteByQuery(
270
+ Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
271
+ TaskService.filter_delete([Task.doc_id == id])
272
+ e, doc = DocumentService.get_by_id(id)
273
+ doc = doc.to_dict()
274
+ doc["tenant_id"] = tenant_id
275
+ bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
276
+ queue_tasks(doc, bucket, name)
277
+ return get_result()
278
+
279
+ @manager.route('/dataset/<dataset_id>/chunk', methods=['DELETE'])
 
 
 
 
 
 
 
 
 
280
  @token_required
281
+ def stop_parsing(tenant_id,dataset_id):
282
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
283
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
284
  req = request.json
285
+ for id in req["document_ids"]:
286
+ if not DocumentService.query(id=id,kb_id=dataset_id):
287
+ return get_error_data_result(retmsg=f"You don't own the document {id}.")
288
+ info = {"run": "2", "progress": 0}
289
+ DocumentService.update_by_id(id, info)
290
+ # if str(req["run"]) == TaskStatus.CANCEL.value:
291
+ tenant_id = DocumentService.get_tenant_id(id)
292
+ ELASTICSEARCH.deleteByQuery(
293
+ Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
294
+ return get_result()
295
+
296
+
297
+ @manager.route('/dataset/{dataset_id}/document/{document_id}/chunk', methods=['GET'])
298
+ @token_required
299
+ def list_chunk(tenant_id,dataset_id,document_id):
300
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
301
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
302
+ doc=DocumentService.query(id=document_id, kb_id=dataset_id)
303
+ if not doc:
304
+ return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
305
+ doc=doc[0]
306
+ req = request.args
307
+ doc_id = document_id
308
+ page = int(req.get("offset", 1))
309
+ size = int(req.get("limit", 30))
310
  question = req.get("keywords", "")
311
  try:
 
 
 
 
 
 
312
  query = {
313
  "doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
314
  }
 
317
  sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
318
  res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
319
 
320
+ origin_chunks = []
321
  for id in sres.ids:
322
  d = {
323
  "chunk_id": id,
 
337
  poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
338
  float(d["positions"][i + 3]), float(d["positions"][i + 4])])
339
  d["positions"] = poss
340
+
341
  origin_chunks.append(d)
342
  ##rename keys
343
  for chunk in origin_chunks:
 
346
  "content_with_weight": "content",
347
  "doc_id": "document_id",
348
  "important_kwd": "important_keywords",
349
+ "img_id": "image_id",
350
  }
351
  renamed_chunk = {}
352
  for key, value in chunk.items():
353
  new_key = key_mapping.get(key, key)
354
  renamed_chunk[new_key] = value
355
  res["chunks"].append(renamed_chunk)
356
+ return get_result(data=res)
357
  except Exception as e:
358
  if str(e).find("not_found") > 0:
359
+ return get_result(retmsg=f'No chunk found!',
360
  retcode=RetCode.DATA_ERROR)
361
  return server_error_response(e)
362
 
363
 
364
+ @manager.route('/dataset/{dataset_id}/document/{document_id}/chunk', methods=['POST'])
365
  @token_required
366
+ def create(tenant_id,dataset_id,document_id):
367
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
368
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
369
+ doc = DocumentService.query(id=document_id, kb_id=dataset_id)
370
+ if not doc:
371
+ return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
372
  req = request.json
373
+ if not req.get("content"):
374
+ return get_error_data_result(retmsg="`content` is required")
375
  md5 = hashlib.md5()
376
+ md5.update((req["content"] + document_id).encode("utf-8"))
377
 
378
  chunk_id = md5.hexdigest()
379
  d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
 
383
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
384
  d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
385
  d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
386
+ d["kb_id"] = [doc.kb_id]
387
+ d["docnm_kwd"] = doc.name
388
+ d["doc_id"] = doc.id
389
+ embd_id = DocumentService.get_embd_id(document_id)
390
+ embd_mdl = TenantLLMService.model_instance(
391
+ tenant_id, LLMType.EMBEDDING.value, embd_id)
392
+
393
+ v, c = embd_mdl.encode([doc.name, req["content"]])
394
+ v = 0.1 * v[0] + 0.9 * v[1]
395
+ d["q_%d_vec" % len(v)] = v.tolist()
396
+ ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
397
+
398
+ DocumentService.increment_chunk_num(
399
+ doc.id, doc.kb_id, c, 1, 0)
400
+ d["chunk_id"] = chunk_id
401
+ # rename keys
402
+ key_mapping = {
403
+ "chunk_id": "id",
404
+ "content_with_weight": "content",
405
+ "doc_id": "document_id",
406
+ "important_kwd": "important_keywords",
407
+ "kb_id": "dataset_id",
408
+ "create_timestamp_flt": "create_timestamp",
409
+ "create_time": "create_time",
410
+ "document_keyword": "document",
411
+ }
412
+ renamed_chunk = {}
413
+ for key, value in d.items():
414
+ if key in key_mapping:
415
+ new_key = key_mapping.get(key, key)
416
+ renamed_chunk[new_key] = value
417
+ return get_result(data={"chunk": renamed_chunk})
418
+ # return get_result(data={"chunk_id": chunk_id})
419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
+ @manager.route('dataset/{dataset_id}/document/{document_id}/chunk', methods=['DELETE'])
 
 
 
 
 
422
  @token_required
423
+ def rm_chunk(tenant_id,dataset_id,document_id):
424
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
425
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
426
+ doc = DocumentService.query(id=document_id, kb_id=dataset_id)
427
+ if not doc:
428
+ return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
429
  req = request.json
430
+ if not req.get("chunk_ids"):
431
+ return get_error_data_result("`chunk_ids` is required")
432
+ if not ELASTICSEARCH.deleteByQuery(
433
+ Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
434
+ return get_error_data_result(retmsg="Index updating failure")
435
+ deleted_chunk_ids = req["chunk_ids"]
436
+ chunk_number = len(deleted_chunk_ids)
437
+ DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
438
+ return get_result()
439
+
440
+
 
 
441
 
442
+ @manager.route('/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}', methods=['PUT'])
443
  @token_required
444
+ def set(tenant_id,dataset_id,document_id,chunk_id):
445
+ if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
446
+ return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
447
+ doc = DocumentService.query(id=document_id, kb_id=dataset_id)
448
+ if not doc:
449
+ return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
450
  req = request.json
451
+ if not req.get("content"):
452
+ return get_error_data_result("`content` is required")
453
+ if not req.get("important_keywords"):
454
+ return get_error_data_result("`important_keywords` is required")
455
  d = {
456
+ "id": chunk_id,
457
  "content_with_weight": req["content"]}
458
  d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
459
  d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
 
461
  d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
462
  if "available" in req:
463
  d["available_int"] = req["available"]
464
+ embd_id = DocumentService.get_embd_id(document_id)
465
+ embd_mdl = TenantLLMService.model_instance(
466
+ tenant_id, LLMType.EMBEDDING.value, embd_id)
467
+ if doc.parser_id == ParserType.QA:
468
+ arr = [
469
+ t for t in re.split(
470
+ r"[\n\t]",
471
+ req["content"]) if len(t) > 1]
472
+ if len(arr) != 2:
473
+ return get_error_data_result(
474
+ retmsg="Q&A must be separated by TAB/ENTER key.")
475
+ q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
476
+ d = beAdoc(d, arr[0], arr[1], not any(
477
+ [rag_tokenizer.is_chinese(t) for t in q + a]))
478
+
479
+ v, c = embd_mdl.encode([doc.name, req["content"]])
480
+ v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
481
+ d["q_%d_vec" % len(v)] = v.tolist()
482
+ ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
483
+ return get_result()
484
+
485
+
486
+
487
+ @manager.route('/retrieval', methods=['GET'])
 
 
 
 
 
 
 
 
 
 
 
488
  @token_required
 
489
  def retrieval_test(tenant_id):
490
+ req = request.args
491
+ if not req.get("datasets"):
492
+ return get_error_data_result("`datasets` is required.")
493
+ for id in req.get("datasets"):
494
+ if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
495
+ return get_error_data_result(f"You don't own the dataset {id}.")
496
+ if not req.get("question"):
497
+ return get_error_data_result("`question` is required.")
498
+ page = int(req.get("offset", 1))
499
+ size = int(req.get("limit", 30))
500
  question = req["question"]
501
+ kb_id = req["datasets"]
502
  if isinstance(kb_id, str): kb_id = [kb_id]
503
+ doc_ids = req.get("documents", [])
504
  similarity_threshold = float(req.get("similarity_threshold", 0.2))
505
  vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
506
  top = int(req.get("top_k", 1024))
507
 
508
  try:
 
 
 
 
 
 
 
 
 
 
 
509
  e, kb = KnowledgebaseService.get_by_id(kb_id[0])
510
  if not e:
511
+ return get_error_data_result(retmsg="Knowledgebase not found!")
 
512
  embd_mdl = TenantLLMService.model_instance(
513
  kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
514
 
 
530
  del c["vector"]
531
 
532
  ##rename keys
533
+ renamed_chunks = []
534
  for chunk in ranks["chunks"]:
535
  key_mapping = {
536
  "chunk_id": "id",
537
  "content_with_weight": "content",
538
  "doc_id": "document_id",
539
  "important_kwd": "important_keywords",
540
+ "docnm_kwd": "document_keyword"
541
  }
542
+ rename_chunk = {}
543
  for key, value in chunk.items():
544
  new_key = key_mapping.get(key, key)
545
  rename_chunk[new_key] = value
546
  renamed_chunks.append(rename_chunk)
547
  ranks["chunks"] = renamed_chunks
548
+ return get_result(data=ranks)
549
  except Exception as e:
550
  if str(e).find("not_found") > 0:
551
+ return get_result(retmsg=f'No chunk found! Check the chunk status please!',
552
  retcode=RetCode.DATA_ERROR)
553
  return server_error_response(e)
api/apps/sdk/session.py CHANGED
@@ -20,47 +20,18 @@ from flask import request, Response
20
 
21
  from api.db import StatusEnum
22
  from api.db.services.dialog_service import DialogService, ConversationService, chat
23
- from api.settings import RetCode
24
  from api.utils import get_uuid
25
- from api.utils.api_utils import get_data_error_result
26
- from api.utils.api_utils import get_json_result, token_required
27
 
28
-
29
- @manager.route('/save', methods=['POST'])
30
  @token_required
31
- def set_conversation(tenant_id):
32
  req = request.json
33
- conv_id = req.get("id")
34
- if "assistant_id" in req:
35
- req["dialog_id"] = req.pop("assistant_id")
36
- if "id" in req:
37
- del req["id"]
38
- conv = ConversationService.query(id=conv_id)
39
- if not conv:
40
- return get_data_error_result(retmsg="Session does not exist")
41
- if not DialogService.query(id=conv[0].dialog_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
42
- return get_data_error_result(retmsg="You do not own the session")
43
- if req.get("dialog_id"):
44
- dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
45
- if not dia:
46
- return get_data_error_result(retmsg="You do not own the assistant")
47
- if "dialog_id" in req and not req.get("dialog_id"):
48
- return get_data_error_result(retmsg="assistant_id can not be empty.")
49
- if "message" in req:
50
- return get_data_error_result(retmsg="message can not be change")
51
- if "reference" in req:
52
- return get_data_error_result(retmsg="reference can not be change")
53
- if "name" in req and not req.get("name"):
54
- return get_data_error_result(retmsg="name can not be empty.")
55
- if not ConversationService.update_by_id(conv_id, req):
56
- return get_data_error_result(retmsg="Session updates error")
57
- return get_json_result(data=True)
58
-
59
- if not req.get("dialog_id"):
60
- return get_data_error_result(retmsg="assistant_id is required.")
61
  dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
62
  if not dia:
63
- return get_data_error_result(retmsg="You do not own the assistant")
64
  conv = {
65
  "id": get_uuid(),
66
  "dialog_id": req["dialog_id"],
@@ -68,33 +39,58 @@ def set_conversation(tenant_id):
68
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
69
  }
70
  if not conv.get("name"):
71
- return get_data_error_result(retmsg="name can not be empty.")
72
  ConversationService.save(**conv)
73
  e, conv = ConversationService.get_by_id(conv["id"])
74
  if not e:
75
- return get_data_error_result(retmsg="Fail to new session!")
76
  conv = conv.to_dict()
77
  conv['messages'] = conv.pop("message")
78
- conv["assistant_id"] = conv.pop("dialog_id")
79
  del conv["reference"]
80
- return get_json_result(data=conv)
81
 
82
-
83
- @manager.route('/completion', methods=['POST'])
84
  @token_required
85
- def completion(tenant_id):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  req = request.json
87
  # req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
88
  # {"role": "user", "content": "上海有吗?"}
89
  # ]}
90
- if "session_id" not in req:
91
- return get_data_error_result(retmsg="session_id is required")
92
- conv = ConversationService.query(id=req["session_id"])
93
  if not conv:
94
- return get_data_error_result(retmsg="Session does not exist")
95
  conv = conv[0]
96
- if not DialogService.query(id=conv.dialog_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
97
- return get_data_error_result(retmsg="You do not own the session")
98
  msg = []
99
  question = {
100
  "content": req.get("question"),
@@ -108,7 +104,6 @@ def completion(tenant_id):
108
  msg.append(m)
109
  message_id = msg[-1].get("id")
110
  e, dia = DialogService.get_by_id(conv.dialog_id)
111
- del req["session_id"]
112
 
113
  if not conv.reference:
114
  conv.reference = []
@@ -130,13 +125,13 @@ def completion(tenant_id):
130
  try:
131
  for ans in chat(dia, msg, **req):
132
  fillin_conv(ans)
133
- yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": ans}, ensure_ascii=False) + "\n\n"
134
  ConversationService.update_by_id(conv.id, conv.to_dict())
135
  except Exception as e:
136
- yield "data:" + json.dumps({"retcode": 500, "retmsg": str(e),
137
  "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
138
  ensure_ascii=False) + "\n\n"
139
- yield "data:" + json.dumps({"retcode": 0, "retmsg": "", "data": True}, ensure_ascii=False) + "\n\n"
140
 
141
  if req.get("stream", True):
142
  resp = Response(stream(), mimetype="text/event-stream")
@@ -153,73 +148,31 @@ def completion(tenant_id):
153
  fillin_conv(ans)
154
  ConversationService.update_by_id(conv.id, conv.to_dict())
155
  break
156
- return get_json_result(data=answer)
157
-
158
 
159
- @manager.route('/get', methods=['GET'])
160
  @token_required
161
- def get(tenant_id):
162
- req = request.args
163
- if "id" not in req:
164
- return get_data_error_result(retmsg="id is required")
165
- conv_id = req["id"]
166
- conv = ConversationService.query(id=conv_id)
167
- if not conv:
168
- return get_data_error_result(retmsg="Session does not exist")
169
- if not DialogService.query(id=conv[0].dialog_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
170
- return get_data_error_result(retmsg="You do not own the session")
171
- if "assistant_id" in req:
172
- if req["assistant_id"] != conv[0].dialog_id:
173
- return get_data_error_result(retmsg="The session doesn't belong to the assistant")
174
- conv = conv[0].to_dict()
175
- conv['messages'] = conv.pop("message")
176
- conv["assistant_id"] = conv.pop("dialog_id")
177
- if conv["reference"]:
178
- messages = conv["messages"]
179
- message_num = 0
180
- chunk_num = 0
181
- while message_num < len(messages):
182
- if message_num != 0 and messages[message_num]["role"] != "user":
183
- chunk_list = []
184
- if "chunks" in conv["reference"][chunk_num]:
185
- chunks = conv["reference"][chunk_num]["chunks"]
186
- for chunk in chunks:
187
- new_chunk = {
188
- "id": chunk["chunk_id"],
189
- "content": chunk["content_with_weight"],
190
- "document_id": chunk["doc_id"],
191
- "document_name": chunk["docnm_kwd"],
192
- "knowledgebase_id": chunk["kb_id"],
193
- "image_id": chunk["img_id"],
194
- "similarity": chunk["similarity"],
195
- "vector_similarity": chunk["vector_similarity"],
196
- "term_similarity": chunk["term_similarity"],
197
- "positions": chunk["positions"],
198
- }
199
- chunk_list.append(new_chunk)
200
- chunk_num += 1
201
- messages[message_num]["reference"] = chunk_list
202
- message_num += 1
203
- del conv["reference"]
204
- return get_json_result(data=conv)
205
-
206
-
207
- @manager.route('/list', methods=["GET"])
208
- @token_required
209
- def list(tenant_id):
210
- assistant_id = request.args["assistant_id"]
211
- if not DialogService.query(tenant_id=tenant_id, id=assistant_id, status=StatusEnum.VALID.value):
212
- return get_json_result(
213
- data=False, retmsg=f"You don't own the assistant.",
214
- retcode=RetCode.OPERATING_ERROR)
215
- convs = ConversationService.query(
216
- dialog_id=assistant_id,
217
- order_by=ConversationService.model.create_time,
218
- reverse=True)
219
- convs = [d.to_dict() for d in convs]
220
  for conv in convs:
221
  conv['messages'] = conv.pop("message")
222
- conv["assistant_id"] = conv.pop("dialog_id")
223
  if conv["reference"]:
224
  messages = conv["messages"]
225
  message_num = 0
@@ -247,20 +200,19 @@ def list(tenant_id):
247
  messages[message_num]["reference"] = chunk_list
248
  message_num += 1
249
  del conv["reference"]
250
- return get_json_result(data=convs)
251
 
252
-
253
- @manager.route('/delete', methods=["DELETE"])
254
  @token_required
255
- def delete(tenant_id):
256
- id = request.args.get("id")
257
- if not id:
258
- return get_data_error_result(retmsg="`id` is required in deleting operation")
259
- conv = ConversationService.query(id=id)
260
- if not conv:
261
- return get_data_error_result(retmsg="Session doesn't exist")
262
- conv = conv[0]
263
- if not DialogService.query(id=conv.dialog_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
264
- return get_data_error_result(retmsg="You don't own the session")
265
- ConversationService.delete_by_id(id)
266
- return get_json_result(data=True)
 
20
 
21
  from api.db import StatusEnum
22
  from api.db.services.dialog_service import DialogService, ConversationService, chat
 
23
  from api.utils import get_uuid
24
+ from api.utils.api_utils import get_error_data_result
25
+ from api.utils.api_utils import get_result, token_required
26
 
27
+ @manager.route('/chat/<chat_id>/session', methods=['POST'])
 
28
  @token_required
29
+ def create(tenant_id,chat_id):
30
  req = request.json
31
+ req["dialog_id"] = chat_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
33
  if not dia:
34
+ return get_error_data_result(retmsg="You do not own the assistant")
35
  conv = {
36
  "id": get_uuid(),
37
  "dialog_id": req["dialog_id"],
 
39
  "message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
40
  }
41
  if not conv.get("name"):
42
+ return get_error_data_result(retmsg="Name can not be empty.")
43
  ConversationService.save(**conv)
44
  e, conv = ConversationService.get_by_id(conv["id"])
45
  if not e:
46
+ return get_error_data_result(retmsg="Fail to create a session!")
47
  conv = conv.to_dict()
48
  conv['messages'] = conv.pop("message")
49
+ conv["chat_id"] = conv.pop("dialog_id")
50
  del conv["reference"]
51
+ return get_result(data=conv)
52
 
53
+ @manager.route('/chat/<chat_id>/session/<session_id>', methods=['PUT'])
 
54
  @token_required
55
+ def update(tenant_id,chat_id,session_id):
56
+ req = request.json
57
+ if "dialog_id" in req and req.get("dialog_id") != chat_id:
58
+ return get_error_data_result(retmsg="Can't change chat_id")
59
+ if "chat_id" in req and req.get("chat_id") != chat_id:
60
+ return get_error_data_result(retmsg="Can't change chat_id")
61
+ req["dialog_id"] = chat_id
62
+ conv_id = session_id
63
+ conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
64
+ if not conv:
65
+ return get_error_data_result(retmsg="Session does not exist")
66
+ if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
67
+ return get_error_data_result(retmsg="You do not own the session")
68
+ if "message" in req or "messages" in req:
69
+ return get_error_data_result(retmsg="Message can not be change")
70
+ if "reference" in req:
71
+ return get_error_data_result(retmsg="Reference can not be change")
72
+ if "name" in req and not req.get("name"):
73
+ return get_error_data_result(retmsg="Name can not be empty.")
74
+ if not ConversationService.update_by_id(conv_id, req):
75
+ return get_error_data_result(retmsg="Session updates error")
76
+ return get_result()
77
+
78
+
79
+ @manager.route('/chat/<chat_id>/session/<session_id>/completion', methods=['POST'])
80
+ @token_required
81
+ def completion(tenant_id,chat_id,session_id):
82
  req = request.json
83
  # req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
84
  # {"role": "user", "content": "上海有吗?"}
85
  # ]}
86
+ if not req.get("question"):
87
+ return get_error_data_result(retmsg="Please input your question.")
88
+ conv = ConversationService.query(id=session_id,dialog_id=chat_id)
89
  if not conv:
90
+ return get_error_data_result(retmsg="Session does not exist")
91
  conv = conv[0]
92
+ if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
93
+ return get_error_data_result(retmsg="You do not own the session")
94
  msg = []
95
  question = {
96
  "content": req.get("question"),
 
104
  msg.append(m)
105
  message_id = msg[-1].get("id")
106
  e, dia = DialogService.get_by_id(conv.dialog_id)
 
107
 
108
  if not conv.reference:
109
  conv.reference = []
 
125
  try:
126
  for ans in chat(dia, msg, **req):
127
  fillin_conv(ans)
128
+ yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
129
  ConversationService.update_by_id(conv.id, conv.to_dict())
130
  except Exception as e:
131
+ yield "data:" + json.dumps({"code": 500, "message": str(e),
132
  "data": {"answer": "**ERROR**: " + str(e), "reference": []}},
133
  ensure_ascii=False) + "\n\n"
134
+ yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
135
 
136
  if req.get("stream", True):
137
  resp = Response(stream(), mimetype="text/event-stream")
 
148
  fillin_conv(ans)
149
  ConversationService.update_by_id(conv.id, conv.to_dict())
150
  break
151
+ return get_result(data=answer)
 
152
 
153
+ @manager.route('/chat/<chat_id>/session', methods=['GET'])
154
  @token_required
155
+ def list(chat_id,tenant_id):
156
+ if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
157
+ return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
158
+ id = request.args.get("id")
159
+ name = request.args.get("name")
160
+ session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
161
+ if not session:
162
+ return get_error_data_result(retmsg="The session doesn't exist")
163
+ page_number = int(request.args.get("page", 1))
164
+ items_per_page = int(request.args.get("page_size", 1024))
165
+ orderby = request.args.get("orderby", "create_time")
166
+ if request.args.get("desc") == "False":
167
+ desc = False
168
+ else:
169
+ desc = True
170
+ convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
171
+ if not convs:
172
+ return get_result(data=[])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
  for conv in convs:
174
  conv['messages'] = conv.pop("message")
175
+ conv["chat"] = conv.pop("dialog_id")
176
  if conv["reference"]:
177
  messages = conv["messages"]
178
  message_num = 0
 
200
  messages[message_num]["reference"] = chunk_list
201
  message_num += 1
202
  del conv["reference"]
203
+ return get_result(data=convs)
204
 
205
+ @manager.route('/chat/<chat_id>/session', methods=["DELETE"])
 
206
  @token_required
207
+ def delete(tenant_id,chat_id):
208
+ if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
209
+ return get_error_data_result(retmsg="You don't own the chat")
210
+ ids = request.json.get("ids")
211
+ if not ids:
212
+ return get_error_data_result(retmsg="`ids` is required in deleting operation")
213
+ for id in ids:
214
+ conv = ConversationService.query(id=id,dialog_id=chat_id)
215
+ if not conv:
216
+ return get_error_data_result(retmsg="The chat doesn't own the session")
217
+ ConversationService.delete_by_id(id)
218
+ return get_result()
api/db/services/dialog_service.py CHANGED
@@ -19,6 +19,8 @@ import json
19
  import re
20
  from copy import deepcopy
21
  from timeit import default_timer as timer
 
 
22
  from api.db import LLMType, ParserType,StatusEnum
23
  from api.db.db_models import Dialog, Conversation,DB
24
  from api.db.services.common_service import CommonService
@@ -61,6 +63,22 @@ class DialogService(CommonService):
61
  class ConversationService(CommonService):
62
  model = Conversation
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  def message_fit_in(msg, max_length=4000):
66
  def count():
 
19
  import re
20
  from copy import deepcopy
21
  from timeit import default_timer as timer
22
+
23
+
24
  from api.db import LLMType, ParserType,StatusEnum
25
  from api.db.db_models import Dialog, Conversation,DB
26
  from api.db.services.common_service import CommonService
 
63
  class ConversationService(CommonService):
64
  model = Conversation
65
 
66
+ @classmethod
67
+ @DB.connection_context()
68
+ def get_list(cls,dialog_id,page_number, items_per_page, orderby, desc, id , name):
69
+ sessions = cls.model.select().where(cls.model.dialog_id ==dialog_id)
70
+ if id:
71
+ sessions = sessions.where(cls.model.id == id)
72
+ if name:
73
+ sessions = sessions.where(cls.model.name == name)
74
+ if desc:
75
+ sessions = sessions.order_by(cls.model.getter_by(orderby).desc())
76
+ else:
77
+ sessions = sessions.order_by(cls.model.getter_by(orderby).asc())
78
+
79
+ sessions = sessions.paginate(page_number, items_per_page)
80
+
81
+ return list(sessions.dicts())
82
 
83
  def message_fit_in(msg, max_length=4000):
84
  def count():
api/db/services/document_service.py CHANGED
@@ -49,6 +49,29 @@ from rag.utils.redis_conn import REDIS_CONN
49
  class DocumentService(CommonService):
50
  model = Document
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  @classmethod
53
  @DB.connection_context()
54
  def get_by_kb_id(cls, kb_id, page_number, items_per_page,
@@ -268,7 +291,7 @@ class DocumentService(CommonService):
268
  @classmethod
269
  @DB.connection_context()
270
  def get_thumbnails(cls, docids):
271
- fields = [cls.model.id, cls.model.kb_id, cls.model.thumbnail]
272
  return list(cls.model.select(
273
  *fields).where(cls.model.id.in_(docids)).dicts())
274
 
 
49
  class DocumentService(CommonService):
50
  model = Document
51
 
52
+ @classmethod
53
+ @DB.connection_context()
54
+ def get_list(cls, kb_id, page_number, items_per_page,
55
+ orderby, desc, keywords, id):
56
+ docs =cls.model.select().where(cls.model.kb_id==kb_id)
57
+ if id:
58
+ docs = docs.where(
59
+ cls.model.id== id )
60
+ if keywords:
61
+ docs = docs.where(
62
+ fn.LOWER(cls.model.name).contains(keywords.lower())
63
+ )
64
+ count = docs.count()
65
+ if desc:
66
+ docs = docs.order_by(cls.model.getter_by(orderby).desc())
67
+ else:
68
+ docs = docs.order_by(cls.model.getter_by(orderby).asc())
69
+
70
+ docs = docs.paginate(page_number, items_per_page)
71
+
72
+ return list(docs.dicts()), count
73
+
74
+
75
  @classmethod
76
  @DB.connection_context()
77
  def get_by_kb_id(cls, kb_id, page_number, items_per_page,
 
291
  @classmethod
292
  @DB.connection_context()
293
  def get_thumbnails(cls, docids):
294
+ fields = [cls.model.id, cls.model.thumbnail]
295
  return list(cls.model.select(
296
  *fields).where(cls.model.id.in_(docids)).dicts())
297
 
api/http_api.md CHANGED
@@ -1441,60 +1441,196 @@ Create a chat session
1441
  ### Request
1442
 
1443
  - Method: POST
1444
- - URL: `/api/v1/chat/{chat_id}/session`
1445
  - Headers:
1446
  - `content-Type: application/json`
1447
- - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
1448
 
1449
  #### Request example
 
1450
  curl --request POST \
1451
  --url http://{address}/api/v1/chat/{chat_id}/session \
1452
  --header 'Content-Type: application/json' \
1453
  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
1454
- --data-binary '{
1455
  "name": "new session"
1456
  }'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1457
 
1458
  ## List the sessions of a chat
1459
 
1460
- **GET** `/api/v1/chat/{chat_id}/session`
1461
 
1462
- List all the session of a chat
1463
 
1464
  ### Request
1465
 
1466
  - Method: GET
1467
- - URL: `/api/v1/chat/{chat_id}/session`
1468
  - Headers:
1469
- - `content-Type: application/json`
1470
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1471
 
1472
  #### Request example
 
1473
  curl --request GET \
1474
- --url http://{address}/api/v1/chat/554e96746aaa11efb06b0242ac120005/session \
1475
- --header 'Content-Type: application/json' \
1476
  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1477
 
1478
- ## Delete a chat session
 
 
1479
 
1480
- **DELETE** `/api/v1/chat/{chat_id}/session/{session_id}`
 
 
1481
 
1482
- Delete a chat session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1483
 
1484
  ### Request
1485
 
1486
  - Method: DELETE
1487
- - URL: `/api/v1/chat/{chat_id}/session/{session_id}`
1488
  - Headers:
1489
  - `content-Type: application/json`
1490
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
1491
 
1492
  #### Request example
 
 
1493
  curl --request DELETE \
1494
- --url http://{address}/api/v1/chat/554e96746aaa11efb06b0242ac120005/session/791aed9670ea11efbb7e0242ac120007 \
1495
- --header 'Content-Type: application/json' \
1496
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
 
 
1497
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1498
  ## Update a chat session
1499
 
1500
  **PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
@@ -1504,20 +1640,45 @@ Update a chat session
1504
  ### Request
1505
 
1506
  - Method: PUT
1507
- - URL: `/api/v1/chat/{chat_id}/session/{session_id}`
1508
  - Headers:
1509
  - `content-Type: application/json`
1510
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
1511
 
1512
  #### Request example
 
1513
  curl --request PUT \
1514
- --url http://{address}/api/v1/chat/554e96746aaa11efb06b0242ac120005/session/791aed9670ea11efbb7e0242ac120007 \
1515
  --header 'Content-Type: application/json' \
1516
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1517
- --data-binary '{
1518
  "name": "Updated session"
1519
  }'
1520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1521
  ## Chat with a chat session
1522
 
1523
  **POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
@@ -1527,17 +1688,139 @@ Chat with a chat session
1527
  ### Request
1528
 
1529
  - Method: POST
1530
- - URL: `/api/v1/chat/{chat_id}/session/{session_id}/completion`
1531
  - Headers:
1532
  - `content-Type: application/json`
1533
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
 
 
 
 
1534
 
1535
  #### Request example
 
1536
  curl --request POST \
1537
- --url http://{address}/api/v1/chat/554e96746aaa11efb06b0242ac120005/session/791aed9670ea11efbb7e0242ac120007/completion \
1538
  --header 'Content-Type: application/json' \
1539
- --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1540
  --data-binary '{
1541
- "question": "Hello!",
1542
- "stream": true,
1543
  }'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
  ### Request
1442
 
1443
  - Method: POST
1444
+ - URL: `http://{address}/api/v1/chat/{chat_id}/session`
1445
  - Headers:
1446
  - `content-Type: application/json`
1447
+ - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1448
+ - Body:
1449
+ - name: `string`
1450
 
1451
  #### Request example
1452
+ ```bash
1453
  curl --request POST \
1454
  --url http://{address}/api/v1/chat/{chat_id}/session \
1455
  --header 'Content-Type: application/json' \
1456
  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
1457
+ --data '{
1458
  "name": "new session"
1459
  }'
1460
+ ```
1461
+ #### Request parameters
1462
+ - `"id"`: (*Body parameter*)
1463
+ The ID of the created session used to identify different sessions.
1464
+ - `None`
1465
+ - `id` cannot be provided when creating.
1466
+
1467
+ - `"name"`: (*Body parameter*)
1468
+ The name of the created session.
1469
+ - `"New session"`
1470
+
1471
+ - `"messages"`: (*Body parameter*)
1472
+ The messages of the created session.
1473
+ - `[{"role": "assistant", "content": "Hi! I am your assistant, can I help you?"}]`
1474
+ - `messages` cannot be provided when creating.
1475
+
1476
+ - `"chat_id"`: (*Path parameter*)
1477
+ The ID of the associated chat.
1478
+ - `""`
1479
+ - `chat_id` cannot be changed.
1480
+
1481
+ ### Response
1482
+ Success
1483
+ ```json
1484
+ {
1485
+ "code": 0,
1486
+ "data": {
1487
+ "chat_id": "2ca4b22e878011ef88fe0242ac120005",
1488
+ "create_date": "Fri, 11 Oct 2024 08:46:14 GMT",
1489
+ "create_time": 1728636374571,
1490
+ "id": "4606b4ec87ad11efbc4f0242ac120006",
1491
+ "messages": [
1492
+ {
1493
+ "content": "Hi! I am your assistant,can I help you?",
1494
+ "role": "assistant"
1495
+ }
1496
+ ],
1497
+ "name": "new session",
1498
+ "update_date": "Fri, 11 Oct 2024 08:46:14 GMT",
1499
+ "update_time": 1728636374571
1500
+ }
1501
+ }
1502
+ ```
1503
+ Error
1504
+ ```json
1505
+ {
1506
+ "code": 102,
1507
+ "message": "Name can not be empty."
1508
+ }
1509
+ ```
1510
 
1511
  ## List the sessions of a chat
1512
 
1513
+ **GET** `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
1514
 
1515
+ List all sessions under the chat based on the filtering criteria.
1516
 
1517
  ### Request
1518
 
1519
  - Method: GET
1520
+ - URL: `http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
1521
  - Headers:
 
1522
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1523
 
1524
  #### Request example
1525
+ ```bash
1526
  curl --request GET \
1527
+ --url http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
 
1528
  --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1529
+ ```
1530
+
1531
+ #### Request Parameters
1532
+ - `"page"`: (*Path parameter*)
1533
+ The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
1534
+ - `1`
1535
+
1536
+ - `"page_size"`: (*Path parameter*)
1537
+ The number of records to retrieve per page. This controls how many records will be included in each page.
1538
+ - `1024`
1539
+
1540
+ - `"orderby"`: (*Path parameter*)
1541
+ The field by which the records should be sorted. This specifies the attribute or column used to order the results.
1542
+ - `"create_time"`
1543
 
1544
+ - `"desc"`: (*Path parameter*)
1545
+ A boolean flag indicating whether the sorting should be in descending order.
1546
+ - `True`
1547
 
1548
+ - `"id"`: (*Path parameter*)
1549
+ The ID of the session to be retrieved.
1550
+ - `None`
1551
 
1552
+ - `"name"`: (*Path parameter*)
1553
+ The name of the session to be retrieved.
1554
+ - `None`
1555
+ ### Response
1556
+ Success
1557
+ ```json
1558
+ {
1559
+ "code": 0,
1560
+ "data": [
1561
+ {
1562
+ "chat": "2ca4b22e878011ef88fe0242ac120005",
1563
+ "create_date": "Fri, 11 Oct 2024 08:46:43 GMT",
1564
+ "create_time": 1728636403974,
1565
+ "id": "578d541e87ad11ef96b90242ac120006",
1566
+ "messages": [
1567
+ {
1568
+ "content": "Hi! I am your assistant,can I help you?",
1569
+ "role": "assistant"
1570
+ }
1571
+ ],
1572
+ "name": "new session",
1573
+ "update_date": "Fri, 11 Oct 2024 08:46:43 GMT",
1574
+ "update_time": 1728636403974
1575
+ }
1576
+ ]
1577
+ }
1578
+ ```
1579
+ Error
1580
+ ```json
1581
+ {
1582
+ "code": 102,
1583
+ "message": "The session doesn't exist"
1584
+ }
1585
+ ```
1586
+
1587
+
1588
+ ## Delete chat sessions
1589
+
1590
+ **DELETE** `/api/v1/chat/{chat_id}/session`
1591
+
1592
+ Delete chat sessions
1593
 
1594
  ### Request
1595
 
1596
  - Method: DELETE
1597
+ - URL: `http://{address}/api/v1/chat/{chat_id}/session`
1598
  - Headers:
1599
  - `content-Type: application/json`
1600
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1601
+ - Body:
1602
+ - `ids`: List[string]
1603
 
1604
  #### Request example
1605
+ ```bash
1606
+ # Either id or name must be provided, but not both.
1607
  curl --request DELETE \
1608
+ --url http://{address}/api/v1/chat/{chat_id}/session \
1609
+ --header 'Content-Type: application/json' \
1610
+ --header 'Authorization: Bear {YOUR_ACCESS_TOKEN}' \
1611
+ --data '{
1612
+ "ids": ["test_1", "test_2"]
1613
+ }'
1614
+ ```
1615
 
1616
+ #### Request Parameters
1617
+ - `ids`: (*Body Parameter*)
1618
+ IDs of the sessions to be deleted.
1619
+ - `None`
1620
+ ### Response
1621
+ Success
1622
+ ```json
1623
+ {
1624
+ "code": 0
1625
+ }
1626
+ ```
1627
+ Error
1628
+ ```json
1629
+ {
1630
+ "code": 102,
1631
+ "message": "The chat doesn't own the session"
1632
+ }
1633
+ ```
1634
  ## Update a chat session
1635
 
1636
  **PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
 
1640
  ### Request
1641
 
1642
  - Method: PUT
1643
+ - URL: `http://{address}/api/v1/chat/{chat_id}/session/{session_id}`
1644
  - Headers:
1645
  - `content-Type: application/json`
1646
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1647
+ - Body:
1648
+ - `name`: string
1649
 
1650
  #### Request example
1651
+ ```bash
1652
  curl --request PUT \
1653
+ --url http://{address}/api/v1/chat/{chat_id}/session/{session_id} \
1654
  --header 'Content-Type: application/json' \
1655
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
1656
+ --data '{
1657
  "name": "Updated session"
1658
  }'
1659
 
1660
+ ```
1661
+
1662
+ #### Request Parameter
1663
+ - `name`:(*Body Parameter)
1664
+ The name of the created session.
1665
+ - `None`
1666
+
1667
+ ### Response
1668
+ Success
1669
+ ```json
1670
+ {
1671
+ "code": 0
1672
+ }
1673
+ ```
1674
+ Error
1675
+ ```json
1676
+ {
1677
+ "code": 102,
1678
+ "message": "Name can not be empty."
1679
+ }
1680
+ ```
1681
+
1682
  ## Chat with a chat session
1683
 
1684
  **POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
 
1688
  ### Request
1689
 
1690
  - Method: POST
1691
+ - URL: `http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion`
1692
  - Headers:
1693
  - `content-Type: application/json`
1694
  - 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
1695
+ - Body:
1696
+ - `question`: string
1697
+ - `stream`: bool
1698
+
1699
 
1700
  #### Request example
1701
+ ```bash
1702
  curl --request POST \
1703
+ --url http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion \
1704
  --header 'Content-Type: application/json' \
1705
+ --header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
1706
  --data-binary '{
1707
+ "question": "你好!",
1708
+ "stream": true
1709
  }'
1710
+ ```
1711
+ #### Request Parameters
1712
+ - `question`:(*Body Parameter*)
1713
+ The question you want to ask.
1714
+ - question is required.
1715
+ `None`
1716
+ - `stream`: (*Body Parameter*)
1717
+ The approach of streaming text generation.
1718
+ `False`
1719
+ ### Response
1720
+ Success
1721
+ ```json
1722
+ data: {
1723
+ "code": 0,
1724
+ "data": {
1725
+ "answer": "您好!有什么具体的问题或者需要的帮助",
1726
+ "reference": {},
1727
+ "audio_binary": null,
1728
+ "id": "31153052-7bac-4741-a513-ed07d853f29e"
1729
+ }
1730
+ }
1731
+
1732
+ data: {
1733
+ "code": 0,
1734
+ "data": {
1735
+ "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
1736
+ "reference": {},
1737
+ "audio_binary": null,
1738
+ "id": "31153052-7bac-4741-a513-ed07d853f29e"
1739
+ }
1740
+ }
1741
+
1742
+ data: {
1743
+ "code": 0,
1744
+ "data": {
1745
+ "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取",
1746
+ "reference": {},
1747
+ "audio_binary": null,
1748
+ "id": "31153052-7bac-4741-a513-ed07d853f29e"
1749
+ }
1750
+ }
1751
+
1752
+ data: {
1753
+ "code": 0,
1754
+ "data": {
1755
+ "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
1756
+ "reference": {},
1757
+ "audio_binary": null,
1758
+ "id": "31153052-7bac-4741-a513-ed07d853f29e"
1759
+ }
1760
+ }
1761
+
1762
+ data: {
1763
+ "code": 0,
1764
+ "data": {
1765
+ "answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗 ##0$$?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
1766
+ "reference": {
1767
+ "total": 19,
1768
+ "chunks": [
1769
+ {
1770
+ "chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
1771
+ "content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下���知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
1772
+ "content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
1773
+ "doc_id": "5c5999ec7be811ef9cab0242ac120005",
1774
+ "docnm_kwd": "1.txt",
1775
+ "kb_id": "c7ee74067a2c11efb21c0242ac120006",
1776
+ "important_kwd": [],
1777
+ "img_id": "",
1778
+ "similarity": 0.38337178633282265,
1779
+ "vector_similarity": 0.3321336754679629,
1780
+ "term_similarity": 0.4053309767034769,
1781
+ "positions": [
1782
+ ""
1783
+ ]
1784
+ },
1785
+ {
1786
+ "chunk_id": "895d34de762e674b43e8613c6fb54c6d",
1787
+ "content_ltks": "\r\n\r\n实际内容可能会超过大模型的输入token数量,因此在调用大模型前会调用api/db/servic/dialog_service.py文件中 messag_fit_in ()根据大模型可用的 token数量进行过滤。这部分与有道的 qanyth的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt ,即可作为大模型的输入了 ,默认的英文prompt如下所示:\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n对应的中文prompt如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。 ",
1788
+ "content_with_weight": "\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。",
1789
+ "doc_id": "5c5999ec7be811ef9cab0242ac120005",
1790
+ "docnm_kwd": "1.txt",
1791
+ "kb_id": "c7ee74067a2c11efb21c0242ac120006",
1792
+ "important_kwd": [],
1793
+ "img_id": "",
1794
+ "similarity": 0.2788204323926715,
1795
+ "vector_similarity": 0.35489427679953667,
1796
+ "term_similarity": 0.2462173562183008,
1797
+ "positions": [
1798
+ ""
1799
+ ]
1800
+ }
1801
+ ],
1802
+ "doc_aggs": [
1803
+ {
1804
+ "doc_name": "1.txt",
1805
+ "doc_id": "5c5999ec7be811ef9cab0242ac120005",
1806
+ "count": 2
1807
+ }
1808
+ ]
1809
+ },
1810
+ "prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n 当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。\n\n------\n\n\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。\n 以上是知识库。\n\n### Query:\n你好,请问有什么问题需要我帮忙解答吗?\n\n### Elapsed\n - Retrieval: 9131.1 ms\n - LLM: 12802.6 ms",
1811
+ "id": "31153052-7bac-4741-a513-ed07d853f29e"
1812
+ }
1813
+ }
1814
+
1815
+ data:{
1816
+ "code": 0,
1817
+ "data": true
1818
+ }
1819
+ ```
1820
+ Error
1821
+ ```json
1822
+ {
1823
+ "code": 102,
1824
+ "message": "Please input your question."
1825
+ }
1826
+ ```
api/python_api_reference.md CHANGED
@@ -906,7 +906,7 @@ Chat-session APIs
906
  ## Create session
907
 
908
  ```python
909
- assistant_1.create_session(name: str = "New session") -> Session
910
  ```
911
 
912
  ### Returns
@@ -916,8 +916,7 @@ A `session` object.
916
  #### id: `str`
917
 
918
  The id of the created session is used to identify different sessions.
919
- - `id` cannot be provided in creating
920
- - `id` is required in updating
921
 
922
  #### name: `str`
923
 
@@ -936,10 +935,10 @@ Defaults:
936
  [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
937
  ```
938
 
939
- #### assistant_id: `str`
940
 
941
- The id of associated assistant. Defaults to `""`.
942
- - `assistant_id` is required in creating if you use HTTP API.
943
 
944
  ### Examples
945
 
@@ -947,81 +946,21 @@ The id of associated assistant. Defaults to `""`.
947
  from ragflow import RAGFlow
948
 
949
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
950
- assi = rag.get_assistant(name="Miss R")
 
951
  sess = assi.create_session()
952
  ```
953
 
954
- ## Retrieve session
955
 
956
- ```python
957
- Assistant.get_session(id: str) -> Session
958
- ```
959
-
960
- ### Parameters
961
-
962
- #### id: `str`, *Required*
963
-
964
- ???????????????????????????????
965
-
966
- ### Returns
967
-
968
- ### Returns
969
-
970
- A `session` object.
971
-
972
- #### id: `str`
973
-
974
- The id of the created session is used to identify different sessions.
975
- - `id` cannot be provided in creating
976
- - `id` is required in updating
977
-
978
- #### name: `str`
979
-
980
- The name of the created session. Defaults to `"New session"`.
981
-
982
- #### messages: `List[Message]`
983
-
984
- The messages of the created session.
985
- - messages cannot be provided.
986
-
987
- Defaults:
988
-
989
- ??????????????????????????????????????????????????????????????????????????????????????????????
990
-
991
- ```
992
- [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
993
- ```
994
-
995
- #### assistant_id: `str`
996
-
997
-
998
- ???????????????????????????????????????How to get
999
-
1000
- The id of associated assistant. Defaults to `""`.
1001
- - `assistant_id` is required in creating if you use HTTP API.
1002
-
1003
- ### Examples
1004
-
1005
- ```python
1006
- from ragflow import RAGFlow
1007
-
1008
- rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1009
- assi = rag.get_assistant(name="Miss R")
1010
- sess = assi.get_session(id="d5c55d2270dd11ef9bd90242ac120007")
1011
- ```
1012
-
1013
- ---
1014
-
1015
- ## Save session settings
1016
 
1017
  ```python
1018
- Session.save() -> bool
1019
  ```
1020
 
1021
  ### Returns
1022
 
1023
- bool
1024
- description:the case of updating a session, True or False.
1025
 
1026
  ### Examples
1027
 
@@ -1029,10 +968,10 @@ description:the case of updating a session, True or False.
1029
  from ragflow import RAGFlow
1030
 
1031
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1032
- assi = rag.get_assistant(name="Miss R")
1033
- sess = assi.get_session(id="d5c55d2270dd11ef9bd90242ac120007")
1034
- sess.name = "Updated session"
1035
- sess.save()
1036
  ```
1037
 
1038
  ---
@@ -1040,7 +979,7 @@ sess.save()
1040
  ## Chat
1041
 
1042
  ```python
1043
- Session.chat(question: str, stream: bool = False) -> Optional[Message, iter[Message]]
1044
  ```
1045
 
1046
  ### Parameters
@@ -1053,7 +992,6 @@ The question to start an AI chat. Defaults to `None`. ???????????????????
1053
 
1054
  The approach of streaming text generation. When stream is True, it outputs results in a streaming fashion; otherwise, it outputs the complete result after the model has finished generating.
1055
 
1056
- #### session_id: `str` ??????????????????
1057
 
1058
  ### Returns
1059
 
@@ -1098,7 +1036,8 @@ The auto-generated reference of the message. Each `chunk` object includes the fo
1098
  from ragflow import RAGFlow
1099
 
1100
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1101
- assi = rag.get_assistant(name="Miss R")
 
1102
  sess = assi.create_session()
1103
 
1104
  print("\n==================== Miss R =====================\n")
@@ -1109,9 +1048,10 @@ while True:
1109
  print("\n==================== Miss R =====================\n")
1110
 
1111
  cont = ""
1112
- for ans in sess.chat(question, stream=True):
1113
  print(ans.content[len(cont):], end='', flush=True)
1114
  cont = ans.content
 
1115
  ```
1116
 
1117
  ---
@@ -1119,7 +1059,14 @@ while True:
1119
  ## List sessions
1120
 
1121
  ```python
1122
- Assistant.list_session() -> List[Session]
 
 
 
 
 
 
 
1123
  ```
1124
 
1125
  ### Returns
@@ -1133,24 +1080,54 @@ description: the List contains information about multiple assistant object, with
1133
  from ragflow import RAGFlow
1134
 
1135
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1136
- assi = rag.get_assistant(name="Miss R")
1137
-
1138
- for sess in assi.list_session():
1139
  print(sess)
1140
  ```
1141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1142
  ---
1143
 
1144
  ## Delete session
1145
 
1146
  ```python
1147
- Session.delete() -> bool
1148
  ```
1149
 
1150
  ### Returns
1151
 
1152
- bool
1153
- description:the case of deleting a session, True or False.
1154
 
1155
  ### Examples
1156
 
@@ -1158,7 +1135,12 @@ description:the case of deleting a session, True or False.
1158
  from ragflow import RAGFlow
1159
 
1160
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1161
- assi = rag.get_assistant(name="Miss R")
1162
- sess = assi.create_session()
1163
- sess.delete()
1164
- ```
 
 
 
 
 
 
906
  ## Create session
907
 
908
  ```python
909
+ Chat.create_session(name: str = "New session") -> Session
910
  ```
911
 
912
  ### Returns
 
916
  #### id: `str`
917
 
918
  The id of the created session is used to identify different sessions.
919
+ - id can not be provided in creating
 
920
 
921
  #### name: `str`
922
 
 
935
  [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
936
  ```
937
 
938
+ #### chat_id: `str`
939
 
940
+ The id of associated chat
941
+ - `chat_id` can't be changed
942
 
943
  ### Examples
944
 
 
946
  from ragflow import RAGFlow
947
 
948
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
949
+ assi = rag.list_chats(name="Miss R")
950
+ assi = assi[0]
951
  sess = assi.create_session()
952
  ```
953
 
 
954
 
955
+ ## Update session
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
956
 
957
  ```python
958
+ Session.update(update_message:dict)
959
  ```
960
 
961
  ### Returns
962
 
963
+ no return
 
964
 
965
  ### Examples
966
 
 
968
  from ragflow import RAGFlow
969
 
970
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
971
+ assi = rag.list_chats(name="Miss R")
972
+ assi = assi[0]
973
+ sess = assi.create_session("new_session")
974
+ sess.update({"name": "Updated session"...})
975
  ```
976
 
977
  ---
 
979
  ## Chat
980
 
981
  ```python
982
+ Session.ask(question: str, stream: bool = False) -> Optional[Message, iter[Message]]
983
  ```
984
 
985
  ### Parameters
 
992
 
993
  The approach of streaming text generation. When stream is True, it outputs results in a streaming fashion; otherwise, it outputs the complete result after the model has finished generating.
994
 
 
995
 
996
  ### Returns
997
 
 
1036
  from ragflow import RAGFlow
1037
 
1038
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1039
+ assi = rag.list_chats(name="Miss R")
1040
+ assi = assi[0]
1041
  sess = assi.create_session()
1042
 
1043
  print("\n==================== Miss R =====================\n")
 
1048
  print("\n==================== Miss R =====================\n")
1049
 
1050
  cont = ""
1051
+ for ans in sess.ask(question, stream=True):
1052
  print(ans.content[len(cont):], end='', flush=True)
1053
  cont = ans.content
1054
+
1055
  ```
1056
 
1057
  ---
 
1059
  ## List sessions
1060
 
1061
  ```python
1062
+ Chat.list_sessions(
1063
+ page: int = 1,
1064
+ page_size: int = 1024,
1065
+ orderby: str = "create_time",
1066
+ desc: bool = True,
1067
+ id: str = None,
1068
+ name: str = None
1069
+ ) -> List[Session]
1070
  ```
1071
 
1072
  ### Returns
 
1080
  from ragflow import RAGFlow
1081
 
1082
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1083
+ assi = rag.list_chats(name="Miss R")
1084
+ assi = assi[0]
1085
+ for sess in assi.list_sessions():
1086
  print(sess)
1087
  ```
1088
 
1089
+ ### Parameters
1090
+
1091
+ #### page: `int`
1092
+
1093
+ The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
1094
+ - `1`
1095
+
1096
+ #### page_size: `int`
1097
+
1098
+ The number of records to retrieve per page. This controls how many records will be included in each page.
1099
+ - `1024`
1100
+
1101
+ #### orderby: `string`
1102
+
1103
+ The field by which the records should be sorted. This specifies the attribute or column used to order the results.
1104
+ - `"create_time"`
1105
+
1106
+ #### desc: `bool`
1107
+
1108
+ A boolean flag indicating whether the sorting should be in descending order.
1109
+ - `True`
1110
+
1111
+ #### id: `string`
1112
+
1113
+ The ID of the chat to be retrieved.
1114
+ - `None`
1115
+
1116
+ #### name: `string`
1117
+
1118
+ The name of the chat to be retrieved.
1119
+ - `None`
1120
  ---
1121
 
1122
  ## Delete session
1123
 
1124
  ```python
1125
+ Chat.delete_sessions(ids:List[str] = None)
1126
  ```
1127
 
1128
  ### Returns
1129
 
1130
+ no return
 
1131
 
1132
  ### Examples
1133
 
 
1135
  from ragflow import RAGFlow
1136
 
1137
  rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
1138
+ assi = rag.list_chats(name="Miss R")
1139
+ assi = assi[0]
1140
+ assi.delete_sessions(ids=["id_1","id_2"])
1141
+ ```
1142
+ ### Parameters
1143
+ #### ids: `List[string]`
1144
+ IDs of the sessions to be deleted.
1145
+ - `None`
1146
+
sdk/python/ragflow/modules/chat.py CHANGED
@@ -51,28 +51,28 @@ class Chat(Base):
51
 
52
 
53
  def create_session(self, name: str = "New session") -> Session:
54
- res = self.post("/session/save", {"name": name, "assistant_id": self.id})
55
  res = res.json()
56
- if res.get("retmsg") == "success":
57
  return Session(self.rag, res['data'])
58
- raise Exception(res["retmsg"])
59
 
60
- def list_session(self) -> List[Session]:
61
- res = self.get('/session/list', {"assistant_id": self.id})
 
62
  res = res.json()
63
- if res.get("retmsg") == "success":
64
  result_list = []
65
  for data in res["data"]:
66
  result_list.append(Session(self.rag, data))
67
  return result_list
68
- raise Exception(res["retmsg"])
69
 
70
- def get_session(self, id) -> Session:
71
- res = self.get("/session/get", {"id": id,"assistant_id":self.id})
72
  res = res.json()
73
- if res.get("retmsg") == "success":
74
- return Session(self.rag, res["data"])
75
- raise Exception(res["retmsg"])
76
 
77
  def get_prologue(self):
78
  return self.prompt.opener
 
51
 
52
 
53
  def create_session(self, name: str = "New session") -> Session:
54
+ res = self.post(f"/chat/{self.id}/session", {"name": name})
55
  res = res.json()
56
+ if res.get("code") == 0:
57
  return Session(self.rag, res['data'])
58
+ raise Exception(res["message"])
59
 
60
+ def list_sessions(self,page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
61
+ id: str = None, name: str = None) -> List[Session]:
62
+ res = self.get(f'/chat/{self.id}/session',{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name} )
63
  res = res.json()
64
+ if res.get("code") == 0:
65
  result_list = []
66
  for data in res["data"]:
67
  result_list.append(Session(self.rag, data))
68
  return result_list
69
+ raise Exception(res["message"])
70
 
71
+ def delete_sessions(self,ids):
72
+ res = self.rm(f"/chat/{self.id}/session", {"ids": ids})
73
  res = res.json()
74
+ if res.get("code") != 0:
75
+ raise Exception(res.get("message"))
 
76
 
77
  def get_prologue(self):
78
  return self.prompt.opener
sdk/python/ragflow/modules/session.py CHANGED
@@ -8,20 +8,20 @@ class Session(Base):
8
  self.id = None
9
  self.name = "New session"
10
  self.messages = [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
11
- self.assistant_id = None
12
  super().__init__(rag, res_dict)
13
 
14
- def chat(self, question: str, stream: bool = False):
15
  for message in self.messages:
16
  if "reference" in message:
17
  message.pop("reference")
18
- res = self.post("/session/completion",
19
- {"session_id": self.id, "question": question, "stream": True}, stream=stream)
20
  for line in res.iter_lines():
21
  line = line.decode("utf-8")
22
  if line.startswith("{"):
23
  json_data = json.loads(line)
24
- raise Exception(json_data["retmsg"])
25
  if line.startswith("data:"):
26
  json_data = json.loads(line[5:])
27
  if json_data["data"] != True:
@@ -52,19 +52,12 @@ class Session(Base):
52
  message = Message(self.rag, temp_dict)
53
  yield message
54
 
55
- def save(self):
56
- res = self.post("/session/save",
57
- {"id": self.id, "assistant_id": self.assistant_id, "name": self.name})
58
  res = res.json()
59
- if res.get("retmsg") == "success": return True
60
- raise Exception(res.get("retmsg"))
61
-
62
- def delete(self):
63
- res = self.rm("/session/delete", {"id": self.id})
64
- res = res.json()
65
- if res.get("retmsg") == "success": return True
66
- raise Exception(res.get("retmsg"))
67
-
68
 
69
  class Message(Base):
70
  def __init__(self, rag, res_dict):
 
8
  self.id = None
9
  self.name = "New session"
10
  self.messages = [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
11
+ self.chat_id = None
12
  super().__init__(rag, res_dict)
13
 
14
+ def ask(self, question: str, stream: bool = False):
15
  for message in self.messages:
16
  if "reference" in message:
17
  message.pop("reference")
18
+ res = self.post(f"/chat/{self.chat_id}/session/{self.id}/completion",
19
+ {"question": question, "stream": True}, stream=stream)
20
  for line in res.iter_lines():
21
  line = line.decode("utf-8")
22
  if line.startswith("{"):
23
  json_data = json.loads(line)
24
+ raise Exception(json_data["message"])
25
  if line.startswith("data:"):
26
  json_data = json.loads(line[5:])
27
  if json_data["data"] != True:
 
52
  message = Message(self.rag, temp_dict)
53
  yield message
54
 
55
+ def update(self,update_message):
56
+ res = self.put(f"/chat/{self.chat_id}/session/{self.id}",
57
+ update_message)
58
  res = res.json()
59
+ if res.get("code") != 0:
60
+ raise Exception(res.get("message"))
 
 
 
 
 
 
 
61
 
62
  class Message(Base):
63
  def __init__(self, rag, res_dict):
sdk/python/test/t_session.py CHANGED
@@ -7,52 +7,44 @@ class TestSession:
7
  def test_create_session(self):
8
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
9
  kb = rag.create_dataset(name="test_create_session")
10
- assistant = rag.create_assistant(name="test_create_session", knowledgebases=[kb])
11
  session = assistant.create_session()
12
  assert isinstance(session,Session), "Failed to create a session."
13
 
14
  def test_create_chat_with_success(self):
15
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
16
  kb = rag.create_dataset(name="test_create_chat")
17
- assistant = rag.create_assistant(name="test_create_chat", knowledgebases=[kb])
18
  session = assistant.create_session()
19
  question = "What is AI"
20
- for ans in session.chat(question, stream=True):
21
  pass
22
  assert not ans.content.startswith("**ERROR**"), "Please check this error."
23
 
24
- def test_delete_session_with_success(self):
25
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
26
  kb = rag.create_dataset(name="test_delete_session")
27
- assistant = rag.create_assistant(name="test_delete_session",knowledgebases=[kb])
28
  session=assistant.create_session()
29
- res=session.delete()
30
- assert res, "Failed to delete the dataset."
31
 
32
  def test_update_session_with_success(self):
33
  rag=RAGFlow(API_KEY,HOST_ADDRESS)
34
  kb=rag.create_dataset(name="test_update_session")
35
- assistant = rag.create_assistant(name="test_update_session",knowledgebases=[kb])
36
  session=assistant.create_session(name="old session")
37
- session.name="new session"
38
- res=session.save()
39
- assert res,"Failed to update the session"
40
 
41
- def test_get_session_with_success(self):
42
- rag=RAGFlow(API_KEY,HOST_ADDRESS)
43
- kb=rag.create_dataset(name="test_get_session")
44
- assistant = rag.create_assistant(name="test_get_session",knowledgebases=[kb])
45
- session = assistant.create_session()
46
- session_2= assistant.get_session(id=session.id)
47
- assert session.to_json()==session_2.to_json(),"Failed to get the session"
48
 
49
- def test_list_session_with_success(self):
50
  rag=RAGFlow(API_KEY,HOST_ADDRESS)
51
  kb=rag.create_dataset(name="test_list_session")
52
- assistant=rag.create_assistant(name="test_list_session",knowledgebases=[kb])
53
  assistant.create_session("test_1")
54
  assistant.create_session("test_2")
55
- sessions=assistant.list_session()
56
  if isinstance(sessions,list):
57
  for session in sessions:
58
  assert isinstance(session,Session),"Non-Session elements exist in the list"
 
7
  def test_create_session(self):
8
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
9
  kb = rag.create_dataset(name="test_create_session")
10
+ assistant = rag.create_chat(name="test_create_session", knowledgebases=[kb])
11
  session = assistant.create_session()
12
  assert isinstance(session,Session), "Failed to create a session."
13
 
14
  def test_create_chat_with_success(self):
15
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
16
  kb = rag.create_dataset(name="test_create_chat")
17
+ assistant = rag.create_chat(name="test_create_chat", knowledgebases=[kb])
18
  session = assistant.create_session()
19
  question = "What is AI"
20
+ for ans in session.ask(question, stream=True):
21
  pass
22
  assert not ans.content.startswith("**ERROR**"), "Please check this error."
23
 
24
+ def test_delete_sessions_with_success(self):
25
  rag = RAGFlow(API_KEY, HOST_ADDRESS)
26
  kb = rag.create_dataset(name="test_delete_session")
27
+ assistant = rag.create_chat(name="test_delete_session",knowledgebases=[kb])
28
  session=assistant.create_session()
29
+ res=assistant.delete_sessions(ids=[session.id])
30
+ assert res is None, "Failed to delete the dataset."
31
 
32
  def test_update_session_with_success(self):
33
  rag=RAGFlow(API_KEY,HOST_ADDRESS)
34
  kb=rag.create_dataset(name="test_update_session")
35
+ assistant = rag.create_chat(name="test_update_session",knowledgebases=[kb])
36
  session=assistant.create_session(name="old session")
37
+ res=session.update({"name":"new session"})
38
+ assert res is None,"Failed to update the session"
 
39
 
 
 
 
 
 
 
 
40
 
41
+ def test_list_sessions_with_success(self):
42
  rag=RAGFlow(API_KEY,HOST_ADDRESS)
43
  kb=rag.create_dataset(name="test_list_session")
44
+ assistant=rag.create_chat(name="test_list_session",knowledgebases=[kb])
45
  assistant.create_session("test_1")
46
  assistant.create_session("test_2")
47
+ sessions=assistant.list_sessions()
48
  if isinstance(sessions,list):
49
  for session in sessions:
50
  assert isinstance(session,Session),"Non-Session elements exist in the list"