liuhua
liuhua
commited on
Commit
·
cd7d2b9
1
Parent(s):
43b4969
Refactor API for document and session (#2819)
Browse files### What problem does this PR solve?
Refactor API for document and session.
### Type of change
- [x] Refactoring
---------
Co-authored-by: liuhua <[email protected]>
- api/apps/sdk/doc.py +290 -457
- api/apps/sdk/session.py +82 -130
- api/db/services/dialog_service.py +18 -0
- api/db/services/document_service.py +24 -1
- api/http_api.md +308 -25
- api/python_api_reference.md +72 -90
- sdk/python/ragflow/modules/chat.py +12 -12
- sdk/python/ragflow/modules/session.py +10 -17
- sdk/python/test/t_session.py +13 -21
api/apps/sdk/doc.py
CHANGED
@@ -4,9 +4,11 @@ import datetime
|
|
4 |
import json
|
5 |
import traceback
|
6 |
|
|
|
7 |
from flask import request
|
8 |
from flask_login import login_required, current_user
|
9 |
from elasticsearch_dsl import Q
|
|
|
10 |
|
11 |
from rag.app.qa import rmPrefix, beAdoc
|
12 |
from rag.nlp import search, rag_tokenizer, keyword_extraction
|
@@ -16,22 +18,22 @@ from api.db import LLMType, ParserType
|
|
16 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
17 |
from api.db.services.llm_service import TenantLLMService
|
18 |
from api.db.services.user_service import UserTenantService
|
19 |
-
from api.utils.api_utils import server_error_response,
|
20 |
from api.db.services.document_service import DocumentService
|
21 |
from api.settings import RetCode, retrievaler, kg_retrievaler
|
22 |
-
from api.utils.api_utils import
|
23 |
import hashlib
|
24 |
import re
|
25 |
-
from api.utils.api_utils import
|
26 |
|
27 |
from api.db.db_models import Task, File
|
28 |
|
29 |
from api.db.services.task_service import TaskService, queue_tasks
|
30 |
from api.db.services.user_service import TenantService, UserTenantService
|
31 |
|
32 |
-
from api.utils.api_utils import server_error_response,
|
33 |
|
34 |
-
from api.utils.api_utils import
|
35 |
|
36 |
from functools import partial
|
37 |
from io import BytesIO
|
@@ -59,307 +61,163 @@ MAXIMUM_OF_UPLOADING_FILES = 256
|
|
59 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
60 |
|
61 |
|
62 |
-
@manager.route('/dataset/<dataset_id>/
|
63 |
@token_required
|
64 |
def upload(dataset_id, tenant_id):
|
65 |
if 'file' not in request.files:
|
66 |
-
return
|
67 |
-
|
68 |
file_objs = request.files.getlist('file')
|
69 |
for file_obj in file_objs:
|
70 |
if file_obj.filename == '':
|
71 |
-
return
|
72 |
-
|
73 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
74 |
if not e:
|
75 |
raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
|
76 |
err, _ = FileService.upload_document(kb, file_objs, tenant_id)
|
77 |
if err:
|
78 |
-
return
|
79 |
-
|
80 |
-
return
|
81 |
|
82 |
|
83 |
-
@manager.route('/
|
84 |
@token_required
|
85 |
-
def
|
86 |
-
req = request.args
|
87 |
-
if "id" not in req and "name" not in req:
|
88 |
-
return get_data_error_result(
|
89 |
-
retmsg="Id or name should be provided")
|
90 |
-
doc_id=None
|
91 |
-
if "id" in req:
|
92 |
-
doc_id = req["id"]
|
93 |
-
if "name" in req:
|
94 |
-
doc_name = req["name"]
|
95 |
-
doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
|
96 |
-
e, doc = DocumentService.get_by_id(doc_id)
|
97 |
-
#rename key's name
|
98 |
-
key_mapping = {
|
99 |
-
"chunk_num": "chunk_count",
|
100 |
-
"kb_id": "knowledgebase_id",
|
101 |
-
"token_num": "token_count",
|
102 |
-
"parser_id":"parser_method",
|
103 |
-
}
|
104 |
-
renamed_doc = {}
|
105 |
-
for key, value in doc.to_dict().items():
|
106 |
-
new_key = key_mapping.get(key, key)
|
107 |
-
renamed_doc[new_key] = value
|
108 |
-
|
109 |
-
return get_json_result(data=renamed_doc)
|
110 |
-
|
111 |
-
|
112 |
-
@manager.route('/save', methods=['POST'])
|
113 |
-
@token_required
|
114 |
-
def save_doc(tenant_id):
|
115 |
req = request.json
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
doc_id = DocumentService.get_doc_id_by_doc_name(doc_name)
|
123 |
-
if not doc_id:
|
124 |
-
return get_json_result(retcode=400, retmsg="Document ID or name is required")
|
125 |
-
e, doc = DocumentService.get_by_id(doc_id)
|
126 |
-
if not e:
|
127 |
-
return get_data_error_result(retmsg="Document not found!")
|
128 |
-
#other value can't be changed
|
129 |
if "chunk_count" in req:
|
130 |
if req["chunk_count"] != doc.chunk_num:
|
131 |
-
return
|
132 |
-
retmsg="Can't change chunk_count.")
|
133 |
if "token_count" in req:
|
134 |
if req["token_count"] != doc.token_num:
|
135 |
-
return
|
136 |
-
retmsg="Can't change token_count.")
|
137 |
if "progress" in req:
|
138 |
if req['progress'] != doc.progress:
|
139 |
-
return
|
140 |
-
retmsg="Can't change progress.")
|
141 |
-
#change name or parse_method
|
142 |
-
if "name" in req and req["name"] != doc.name:
|
143 |
-
try:
|
144 |
-
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
145 |
-
doc.name.lower()).suffix:
|
146 |
-
return get_json_result(
|
147 |
-
data=False,
|
148 |
-
retmsg="The extension of file can't be changed",
|
149 |
-
retcode=RetCode.ARGUMENT_ERROR)
|
150 |
-
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
151 |
-
if d.name == req["name"]:
|
152 |
-
return get_data_error_result(
|
153 |
-
retmsg="Duplicated document name in the same knowledgebase.")
|
154 |
-
|
155 |
-
if not DocumentService.update_by_id(
|
156 |
-
doc_id, {"name": req["name"]}):
|
157 |
-
return get_data_error_result(
|
158 |
-
retmsg="Database error (Document rename)!")
|
159 |
-
|
160 |
-
informs = File2DocumentService.get_by_document_id(doc_id)
|
161 |
-
if informs:
|
162 |
-
e, file = FileService.get_by_id(informs[0].file_id)
|
163 |
-
FileService.update_by_id(file.id, {"name": req["name"]})
|
164 |
-
except Exception as e:
|
165 |
-
return server_error_response(e)
|
166 |
-
if "parser_method" in req:
|
167 |
-
try:
|
168 |
-
if doc.parser_id.lower() == req["parser_method"].lower():
|
169 |
-
if "parser_config" in req:
|
170 |
-
if req["parser_config"] == doc.parser_config:
|
171 |
-
return get_json_result(data=True)
|
172 |
-
else:
|
173 |
-
return get_json_result(data=True)
|
174 |
-
|
175 |
-
if doc.type == FileType.VISUAL or re.search(
|
176 |
-
r"\.(ppt|pptx|pages)$", doc.name):
|
177 |
-
return get_data_error_result(retmsg="Not supported yet!")
|
178 |
-
|
179 |
-
e = DocumentService.update_by_id(doc.id,
|
180 |
-
{"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
|
181 |
-
"run": TaskStatus.UNSTART.value})
|
182 |
-
if not e:
|
183 |
-
return get_data_error_result(retmsg="Document not found!")
|
184 |
-
if "parser_config" in req:
|
185 |
-
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
186 |
-
if doc.token_num > 0:
|
187 |
-
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
188 |
-
doc.process_duation * -1)
|
189 |
-
if not e:
|
190 |
-
return get_data_error_result(retmsg="Document not found!")
|
191 |
-
tenant_id = DocumentService.get_tenant_id(req["id"])
|
192 |
-
if not tenant_id:
|
193 |
-
return get_data_error_result(retmsg="Tenant not found!")
|
194 |
-
ELASTICSEARCH.deleteByQuery(
|
195 |
-
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
196 |
-
except Exception as e:
|
197 |
-
return server_error_response(e)
|
198 |
-
return get_json_result(data=True)
|
199 |
-
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
if not e:
|
209 |
-
return get_data_error_result(retmsg="Document not found!")
|
210 |
-
if doc.parser_id.lower() == req["parser_id"].lower():
|
211 |
if "parser_config" in req:
|
212 |
if req["parser_config"] == doc.parser_config:
|
213 |
-
return
|
214 |
else:
|
215 |
-
return
|
216 |
|
217 |
if doc.type == FileType.VISUAL or re.search(
|
218 |
r"\.(ppt|pptx|pages)$", doc.name):
|
219 |
-
return
|
220 |
|
221 |
e = DocumentService.update_by_id(doc.id,
|
222 |
-
{"parser_id": req["
|
223 |
"run": TaskStatus.UNSTART.value})
|
224 |
if not e:
|
225 |
-
return
|
226 |
-
if "parser_config" in req:
|
227 |
-
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
228 |
if doc.token_num > 0:
|
229 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
230 |
doc.process_duation * -1)
|
231 |
if not e:
|
232 |
-
return
|
233 |
-
tenant_id = DocumentService.get_tenant_id(req["
|
234 |
if not tenant_id:
|
235 |
-
return
|
236 |
ELASTICSEARCH.deleteByQuery(
|
237 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
|
|
|
|
238 |
|
239 |
-
|
240 |
-
except Exception as e:
|
241 |
-
return server_error_response(e)
|
242 |
-
|
243 |
-
@manager.route('/rename', methods=['POST'])
|
244 |
-
@login_required
|
245 |
-
@validate_request("doc_id", "name")
|
246 |
-
def rename():
|
247 |
-
req = request.json
|
248 |
-
try:
|
249 |
-
e, doc = DocumentService.get_by_id(req["doc_id"])
|
250 |
-
if not e:
|
251 |
-
return get_data_error_result(retmsg="Document not found!")
|
252 |
-
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(
|
253 |
-
doc.name.lower()).suffix:
|
254 |
-
return get_json_result(
|
255 |
-
data=False,
|
256 |
-
retmsg="The extension of file can't be changed",
|
257 |
-
retcode=RetCode.ARGUMENT_ERROR)
|
258 |
-
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
259 |
-
if d.name == req["name"]:
|
260 |
-
return get_data_error_result(
|
261 |
-
retmsg="Duplicated document name in the same knowledgebase.")
|
262 |
-
|
263 |
-
if not DocumentService.update_by_id(
|
264 |
-
req["doc_id"], {"name": req["name"]}):
|
265 |
-
return get_data_error_result(
|
266 |
-
retmsg="Database error (Document rename)!")
|
267 |
-
|
268 |
-
informs = File2DocumentService.get_by_document_id(req["doc_id"])
|
269 |
-
if informs:
|
270 |
-
e, file = FileService.get_by_id(informs[0].file_id)
|
271 |
-
FileService.update_by_id(file.id, {"name": req["name"]})
|
272 |
-
|
273 |
-
return get_json_result(data=True)
|
274 |
-
except Exception as e:
|
275 |
-
return server_error_response(e)
|
276 |
|
277 |
|
278 |
-
@manager.route(
|
279 |
@token_required
|
280 |
-
def
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
file
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
)
|
303 |
-
|
304 |
-
# Error
|
305 |
-
except Exception as e:
|
306 |
-
return construct_error_response(e)
|
307 |
-
|
308 |
-
|
309 |
-
@manager.route('/dataset/<dataset_id>/documents', methods=['GET'])
|
310 |
@token_required
|
311 |
def list_docs(dataset_id, tenant_id):
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
break
|
321 |
-
else:
|
322 |
-
return get_json_result(
|
323 |
-
data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
|
324 |
-
retcode=RetCode.OPERATING_ERROR)
|
325 |
-
keywords = request.args.get("keywords", "")
|
326 |
-
|
327 |
-
page_number = int(request.args.get("page", 1))
|
328 |
-
items_per_page = int(request.args.get("page_size", 15))
|
329 |
orderby = request.args.get("orderby", "create_time")
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
|
|
334 |
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
except Exception as e:
|
351 |
-
return server_error_response(e)
|
352 |
|
353 |
|
354 |
-
@manager.route('/
|
355 |
@token_required
|
356 |
-
def
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
root_folder = FileService.get_root_folder(tenant_id)
|
364 |
pf_id = root_folder["id"]
|
365 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
@@ -368,15 +226,15 @@ def rm(tenant_id):
|
|
368 |
try:
|
369 |
e, doc = DocumentService.get_by_id(doc_id)
|
370 |
if not e:
|
371 |
-
return
|
372 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
373 |
if not tenant_id:
|
374 |
-
return
|
375 |
|
376 |
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
377 |
|
378 |
if not DocumentService.remove_document(doc, tenant_id):
|
379 |
-
return
|
380 |
retmsg="Database error (Document removal)!")
|
381 |
|
382 |
f2d = File2DocumentService.get_by_document_id(doc_id)
|
@@ -388,80 +246,69 @@ def rm(tenant_id):
|
|
388 |
errors += str(e)
|
389 |
|
390 |
if errors:
|
391 |
-
return
|
392 |
|
393 |
-
return
|
394 |
-
|
395 |
-
@manager.route("/<document_id>/status", methods=["GET"])
|
396 |
-
@token_required
|
397 |
-
def show_parsing_status(tenant_id, document_id):
|
398 |
-
try:
|
399 |
-
# valid document
|
400 |
-
exist, _ = DocumentService.get_by_id(document_id)
|
401 |
-
if not exist:
|
402 |
-
return construct_json_result(code=RetCode.DATA_ERROR,
|
403 |
-
message=f"This document: '{document_id}' is not a valid document.")
|
404 |
-
|
405 |
-
_, doc = DocumentService.get_by_id(document_id) # get doc object
|
406 |
-
doc_attributes = doc.to_dict()
|
407 |
-
|
408 |
-
return construct_json_result(
|
409 |
-
data={"progress": doc_attributes["progress"], "status": TaskStatus(doc_attributes["status"]).name},
|
410 |
-
code=RetCode.SUCCESS
|
411 |
-
)
|
412 |
-
except Exception as e:
|
413 |
-
return construct_error_response(e)
|
414 |
|
415 |
|
416 |
-
|
417 |
-
@manager.route('/run', methods=['POST'])
|
418 |
@token_required
|
419 |
-
def
|
|
|
|
|
420 |
req = request.json
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
|
442 |
-
queue_tasks(doc, bucket, name)
|
443 |
-
|
444 |
-
return get_json_result(data=True)
|
445 |
-
except Exception as e:
|
446 |
-
return server_error_response(e)
|
447 |
-
|
448 |
-
|
449 |
-
@manager.route('/chunk/list', methods=['POST'])
|
450 |
@token_required
|
451 |
-
|
452 |
-
|
|
|
453 |
req = request.json
|
454 |
-
|
455 |
-
|
456 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
question = req.get("keywords", "")
|
458 |
try:
|
459 |
-
tenant_id = DocumentService.get_tenant_id(req["document_id"])
|
460 |
-
if not tenant_id:
|
461 |
-
return get_data_error_result(retmsg="Tenant not found!")
|
462 |
-
e, doc = DocumentService.get_by_id(doc_id)
|
463 |
-
if not e:
|
464 |
-
return get_data_error_result(retmsg="Document not found!")
|
465 |
query = {
|
466 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
|
467 |
}
|
@@ -470,7 +317,7 @@ def list_chunk(tenant_id):
|
|
470 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
471 |
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
472 |
|
473 |
-
origin_chunks=[]
|
474 |
for id in sres.ids:
|
475 |
d = {
|
476 |
"chunk_id": id,
|
@@ -490,7 +337,7 @@ def list_chunk(tenant_id):
|
|
490 |
poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
|
491 |
float(d["positions"][i + 3]), float(d["positions"][i + 4])])
|
492 |
d["positions"] = poss
|
493 |
-
|
494 |
origin_chunks.append(d)
|
495 |
##rename keys
|
496 |
for chunk in origin_chunks:
|
@@ -499,28 +346,34 @@ def list_chunk(tenant_id):
|
|
499 |
"content_with_weight": "content",
|
500 |
"doc_id": "document_id",
|
501 |
"important_kwd": "important_keywords",
|
502 |
-
"img_id":"image_id",
|
503 |
}
|
504 |
renamed_chunk = {}
|
505 |
for key, value in chunk.items():
|
506 |
new_key = key_mapping.get(key, key)
|
507 |
renamed_chunk[new_key] = value
|
508 |
res["chunks"].append(renamed_chunk)
|
509 |
-
return
|
510 |
except Exception as e:
|
511 |
if str(e).find("not_found") > 0:
|
512 |
-
return
|
513 |
retcode=RetCode.DATA_ERROR)
|
514 |
return server_error_response(e)
|
515 |
|
516 |
|
517 |
-
@manager.route('/chunk
|
518 |
@token_required
|
519 |
-
|
520 |
-
|
|
|
|
|
|
|
|
|
521 |
req = request.json
|
|
|
|
|
522 |
md5 = hashlib.md5()
|
523 |
-
md5.update((req["content"] +
|
524 |
|
525 |
chunk_id = md5.hexdigest()
|
526 |
d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
|
@@ -530,80 +383,77 @@ def create(tenant_id):
|
|
530 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
|
531 |
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
|
532 |
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
|
534 |
-
try:
|
535 |
-
e, doc = DocumentService.get_by_id(req["document_id"])
|
536 |
-
if not e:
|
537 |
-
return get_data_error_result(retmsg="Document not found!")
|
538 |
-
d["kb_id"] = [doc.kb_id]
|
539 |
-
d["docnm_kwd"] = doc.name
|
540 |
-
d["doc_id"] = doc.id
|
541 |
-
|
542 |
-
tenant_id = DocumentService.get_tenant_id(req["document_id"])
|
543 |
-
if not tenant_id:
|
544 |
-
return get_data_error_result(retmsg="Tenant not found!")
|
545 |
-
|
546 |
-
embd_id = DocumentService.get_embd_id(req["document_id"])
|
547 |
-
embd_mdl = TenantLLMService.model_instance(
|
548 |
-
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
549 |
-
|
550 |
-
v, c = embd_mdl.encode([doc.name, req["content"]])
|
551 |
-
v = 0.1 * v[0] + 0.9 * v[1]
|
552 |
-
d["q_%d_vec" % len(v)] = v.tolist()
|
553 |
-
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
554 |
-
|
555 |
-
DocumentService.increment_chunk_num(
|
556 |
-
doc.id, doc.kb_id, c, 1, 0)
|
557 |
-
d["chunk_id"] = chunk_id
|
558 |
-
#rename keys
|
559 |
-
key_mapping = {
|
560 |
-
"chunk_id": "id",
|
561 |
-
"content_with_weight": "content",
|
562 |
-
"doc_id": "document_id",
|
563 |
-
"important_kwd": "important_keywords",
|
564 |
-
"kb_id":"dataset_id",
|
565 |
-
"create_timestamp_flt":"create_timestamp",
|
566 |
-
"create_time": "create_time",
|
567 |
-
"document_keyword":"document",
|
568 |
-
}
|
569 |
-
renamed_chunk = {}
|
570 |
-
for key, value in d.items():
|
571 |
-
if key in key_mapping:
|
572 |
-
new_key = key_mapping.get(key, key)
|
573 |
-
renamed_chunk[new_key] = value
|
574 |
|
575 |
-
|
576 |
-
# return get_json_result(data={"chunk_id": chunk_id})
|
577 |
-
except Exception as e:
|
578 |
-
return server_error_response(e)
|
579 |
-
|
580 |
-
@manager.route('/chunk/rm', methods=['POST'])
|
581 |
@token_required
|
582 |
-
|
583 |
-
|
|
|
|
|
|
|
|
|
584 |
req = request.json
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
except Exception as e:
|
597 |
-
return server_error_response(e)
|
598 |
|
599 |
-
@manager.route('/chunk/
|
600 |
@token_required
|
601 |
-
|
602 |
-
|
603 |
-
|
|
|
|
|
|
|
604 |
req = request.json
|
|
|
|
|
|
|
|
|
605 |
d = {
|
606 |
-
"id":
|
607 |
"content_with_weight": req["content"]}
|
608 |
d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
|
609 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
@@ -611,71 +461,54 @@ def set(tenant_id):
|
|
611 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
612 |
if "available" in req:
|
613 |
d["available_int"] = req["available"]
|
614 |
-
|
615 |
-
|
616 |
-
tenant_id
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
621 |
-
|
622 |
-
|
623 |
-
|
624 |
-
|
625 |
-
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
630 |
-
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
|
635 |
-
|
636 |
-
|
637 |
-
|
638 |
-
[rag_tokenizer.is_chinese(t) for t in q + a]))
|
639 |
-
|
640 |
-
v, c = embd_mdl.encode([doc.name, req["content"]])
|
641 |
-
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
642 |
-
d["q_%d_vec" % len(v)] = v.tolist()
|
643 |
-
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
644 |
-
return get_json_result(data=True)
|
645 |
-
except Exception as e:
|
646 |
-
return server_error_response(e)
|
647 |
-
|
648 |
-
@manager.route('/retrieval_test', methods=['POST'])
|
649 |
@token_required
|
650 |
-
@validate_request("knowledgebase_id", "question")
|
651 |
def retrieval_test(tenant_id):
|
652 |
-
req = request.
|
653 |
-
|
654 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
question = req["question"]
|
656 |
-
kb_id = req["
|
657 |
if isinstance(kb_id, str): kb_id = [kb_id]
|
658 |
-
doc_ids = req.get("
|
659 |
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
660 |
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
661 |
top = int(req.get("top_k", 1024))
|
662 |
|
663 |
try:
|
664 |
-
tenants = UserTenantService.query(user_id=tenant_id)
|
665 |
-
for kid in kb_id:
|
666 |
-
for tenant in tenants:
|
667 |
-
if KnowledgebaseService.query(
|
668 |
-
tenant_id=tenant.tenant_id, id=kid):
|
669 |
-
break
|
670 |
-
else:
|
671 |
-
return get_json_result(
|
672 |
-
data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.',
|
673 |
-
retcode=RetCode.OPERATING_ERROR)
|
674 |
-
|
675 |
e, kb = KnowledgebaseService.get_by_id(kb_id[0])
|
676 |
if not e:
|
677 |
-
return
|
678 |
-
|
679 |
embd_mdl = TenantLLMService.model_instance(
|
680 |
kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
681 |
|
@@ -697,24 +530,24 @@ def retrieval_test(tenant_id):
|
|
697 |
del c["vector"]
|
698 |
|
699 |
##rename keys
|
700 |
-
renamed_chunks=[]
|
701 |
for chunk in ranks["chunks"]:
|
702 |
key_mapping = {
|
703 |
"chunk_id": "id",
|
704 |
"content_with_weight": "content",
|
705 |
"doc_id": "document_id",
|
706 |
"important_kwd": "important_keywords",
|
707 |
-
"docnm_kwd":"document_keyword"
|
708 |
}
|
709 |
-
rename_chunk={}
|
710 |
for key, value in chunk.items():
|
711 |
new_key = key_mapping.get(key, key)
|
712 |
rename_chunk[new_key] = value
|
713 |
renamed_chunks.append(rename_chunk)
|
714 |
ranks["chunks"] = renamed_chunks
|
715 |
-
return
|
716 |
except Exception as e:
|
717 |
if str(e).find("not_found") > 0:
|
718 |
-
return
|
719 |
retcode=RetCode.DATA_ERROR)
|
720 |
return server_error_response(e)
|
|
|
4 |
import json
|
5 |
import traceback
|
6 |
|
7 |
+
from botocore.docs.method import document_model_driven_method
|
8 |
from flask import request
|
9 |
from flask_login import login_required, current_user
|
10 |
from elasticsearch_dsl import Q
|
11 |
+
from sphinx.addnodes import document
|
12 |
|
13 |
from rag.app.qa import rmPrefix, beAdoc
|
14 |
from rag.nlp import search, rag_tokenizer, keyword_extraction
|
|
|
18 |
from api.db.services.knowledgebase_service import KnowledgebaseService
|
19 |
from api.db.services.llm_service import TenantLLMService
|
20 |
from api.db.services.user_service import UserTenantService
|
21 |
+
from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
|
22 |
from api.db.services.document_service import DocumentService
|
23 |
from api.settings import RetCode, retrievaler, kg_retrievaler
|
24 |
+
from api.utils.api_utils import get_result
|
25 |
import hashlib
|
26 |
import re
|
27 |
+
from api.utils.api_utils import get_result, token_required, get_error_data_result
|
28 |
|
29 |
from api.db.db_models import Task, File
|
30 |
|
31 |
from api.db.services.task_service import TaskService, queue_tasks
|
32 |
from api.db.services.user_service import TenantService, UserTenantService
|
33 |
|
34 |
+
from api.utils.api_utils import server_error_response, get_error_data_result, validate_request
|
35 |
|
36 |
+
from api.utils.api_utils import get_result, get_result, get_error_data_result
|
37 |
|
38 |
from functools import partial
|
39 |
from io import BytesIO
|
|
|
61 |
MAXIMUM_OF_UPLOADING_FILES = 256
|
62 |
|
63 |
|
64 |
+
@manager.route('/dataset/<dataset_id>/document', methods=['POST'])
|
65 |
@token_required
|
66 |
def upload(dataset_id, tenant_id):
|
67 |
if 'file' not in request.files:
|
68 |
+
return get_error_data_result(
|
69 |
+
retmsg='No file part!', retcode=RetCode.ARGUMENT_ERROR)
|
70 |
file_objs = request.files.getlist('file')
|
71 |
for file_obj in file_objs:
|
72 |
if file_obj.filename == '':
|
73 |
+
return get_result(
|
74 |
+
retmsg='No file selected!', retcode=RetCode.ARGUMENT_ERROR)
|
75 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
76 |
if not e:
|
77 |
raise LookupError(f"Can't find the knowledgebase with ID {dataset_id}!")
|
78 |
err, _ = FileService.upload_document(kb, file_objs, tenant_id)
|
79 |
if err:
|
80 |
+
return get_result(
|
81 |
+
retmsg="\n".join(err), retcode=RetCode.SERVER_ERROR)
|
82 |
+
return get_result()
|
83 |
|
84 |
|
85 |
+
@manager.route('/dataset/<dataset_id>/info/<document_id>', methods=['PUT'])
|
86 |
@token_required
|
87 |
+
def update_doc(tenant_id, dataset_id, document_id):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
req = request.json
|
89 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
90 |
+
return get_error_data_result(retmsg='You do not own the dataset.')
|
91 |
+
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
92 |
+
if not doc:
|
93 |
+
return get_error_data_result(retmsg='The dataset not own the document.')
|
94 |
+
doc = doc[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
if "chunk_count" in req:
|
96 |
if req["chunk_count"] != doc.chunk_num:
|
97 |
+
return get_error_data_result(retmsg="Can't change chunk_count.")
|
|
|
98 |
if "token_count" in req:
|
99 |
if req["token_count"] != doc.token_num:
|
100 |
+
return get_error_data_result(retmsg="Can't change token_count.")
|
|
|
101 |
if "progress" in req:
|
102 |
if req['progress'] != doc.progress:
|
103 |
+
return get_error_data_result(retmsg="Can't change progress.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
+
if "name" in req and req["name"] != doc.name:
|
106 |
+
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
107 |
+
return get_result(retmsg="The extension of file can't be changed", retcode=RetCode.ARGUMENT_ERROR)
|
108 |
+
for d in DocumentService.query(name=req["name"], kb_id=doc.kb_id):
|
109 |
+
if d.name == req["name"]:
|
110 |
+
return get_error_data_result(
|
111 |
+
retmsg="Duplicated document name in the same knowledgebase.")
|
112 |
+
if not DocumentService.update_by_id(
|
113 |
+
document_id, {"name": req["name"]}):
|
114 |
+
return get_error_data_result(
|
115 |
+
retmsg="Database error (Document rename)!")
|
116 |
|
117 |
+
informs = File2DocumentService.get_by_document_id(document_id)
|
118 |
+
if informs:
|
119 |
+
e, file = FileService.get_by_id(informs[0].file_id)
|
120 |
+
FileService.update_by_id(file.id, {"name": req["name"]})
|
121 |
+
if "parser_method" in req:
|
122 |
+
if doc.parser_id.lower() == req["parser_method"].lower():
|
|
|
|
|
|
|
123 |
if "parser_config" in req:
|
124 |
if req["parser_config"] == doc.parser_config:
|
125 |
+
return get_result(retcode=RetCode.SUCCESS)
|
126 |
else:
|
127 |
+
return get_result(retcode=RetCode.SUCCESS)
|
128 |
|
129 |
if doc.type == FileType.VISUAL or re.search(
|
130 |
r"\.(ppt|pptx|pages)$", doc.name):
|
131 |
+
return get_error_data_result(retmsg="Not supported yet!")
|
132 |
|
133 |
e = DocumentService.update_by_id(doc.id,
|
134 |
+
{"parser_id": req["parser_method"], "progress": 0, "progress_msg": "",
|
135 |
"run": TaskStatus.UNSTART.value})
|
136 |
if not e:
|
137 |
+
return get_error_data_result(retmsg="Document not found!")
|
|
|
|
|
138 |
if doc.token_num > 0:
|
139 |
e = DocumentService.increment_chunk_num(doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1,
|
140 |
doc.process_duation * -1)
|
141 |
if not e:
|
142 |
+
return get_error_data_result(retmsg="Document not found!")
|
143 |
+
tenant_id = DocumentService.get_tenant_id(req["id"])
|
144 |
if not tenant_id:
|
145 |
+
return get_error_data_result(retmsg="Tenant not found!")
|
146 |
ELASTICSEARCH.deleteByQuery(
|
147 |
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
|
148 |
+
if "parser_config" in req:
|
149 |
+
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
150 |
|
151 |
+
return get_result()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
|
154 |
+
@manager.route('/dataset/<dataset_id>/document/<document_id>', methods=['GET'])
|
155 |
@token_required
|
156 |
+
def download(tenant_id, dataset_id, document_id):
|
157 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
158 |
+
return get_error_data_result(retmsg=f'You do not own the dataset {dataset_id}.')
|
159 |
+
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
160 |
+
if not doc:
|
161 |
+
return get_error_data_result(retmsg=f'The dataset not own the document {doc.id}.')
|
162 |
+
# The process of downloading
|
163 |
+
doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address
|
164 |
+
file_stream = STORAGE_IMPL.get(doc_id, doc_location)
|
165 |
+
if not file_stream:
|
166 |
+
return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR)
|
167 |
+
file = BytesIO(file_stream)
|
168 |
+
# Use send_file with a proper filename and MIME type
|
169 |
+
return send_file(
|
170 |
+
file,
|
171 |
+
as_attachment=True,
|
172 |
+
download_name=doc[0].name,
|
173 |
+
mimetype='application/octet-stream' # Set a default MIME type
|
174 |
+
)
|
175 |
+
|
176 |
+
|
177 |
+
@manager.route('/dataset/<dataset_id>/info', methods=['GET'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
@token_required
|
179 |
def list_docs(dataset_id, tenant_id):
|
180 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
181 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
182 |
+
id = request.args.get("id")
|
183 |
+
if not DocumentService.query(id=id,kb_id=dataset_id):
|
184 |
+
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
185 |
+
offset = int(request.args.get("offset", 1))
|
186 |
+
keywords = request.args.get("keywords","")
|
187 |
+
limit = int(request.args.get("limit", 1024))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
orderby = request.args.get("orderby", "create_time")
|
189 |
+
if request.args.get("desc") == "False":
|
190 |
+
desc = False
|
191 |
+
else:
|
192 |
+
desc = True
|
193 |
+
docs, tol = DocumentService.get_list(dataset_id, offset, limit, orderby, desc, keywords, id)
|
194 |
|
195 |
+
# rename key's name
|
196 |
+
renamed_doc_list = []
|
197 |
+
for doc in docs:
|
198 |
+
key_mapping = {
|
199 |
+
"chunk_num": "chunk_count",
|
200 |
+
"kb_id": "knowledgebase_id",
|
201 |
+
"token_num": "token_count",
|
202 |
+
"parser_id": "parser_method"
|
203 |
+
}
|
204 |
+
renamed_doc = {}
|
205 |
+
for key, value in doc.items():
|
206 |
+
new_key = key_mapping.get(key, key)
|
207 |
+
renamed_doc[new_key] = value
|
208 |
+
renamed_doc_list.append(renamed_doc)
|
209 |
+
return get_result(data={"total": tol, "docs": renamed_doc_list})
|
|
|
|
|
210 |
|
211 |
|
212 |
+
@manager.route('/dataset/<dataset_id>/document', methods=['DELETE'])
|
213 |
@token_required
|
214 |
+
def delete(tenant_id,dataset_id):
|
215 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
216 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
217 |
+
req = request.json
|
218 |
+
if not req.get("ids"):
|
219 |
+
return get_error_data_result(retmsg="ids is required")
|
220 |
+
doc_ids = req["ids"]
|
221 |
root_folder = FileService.get_root_folder(tenant_id)
|
222 |
pf_id = root_folder["id"]
|
223 |
FileService.init_knowledgebase_docs(pf_id, tenant_id)
|
|
|
226 |
try:
|
227 |
e, doc = DocumentService.get_by_id(doc_id)
|
228 |
if not e:
|
229 |
+
return get_error_data_result(retmsg="Document not found!")
|
230 |
tenant_id = DocumentService.get_tenant_id(doc_id)
|
231 |
if not tenant_id:
|
232 |
+
return get_error_data_result(retmsg="Tenant not found!")
|
233 |
|
234 |
b, n = File2DocumentService.get_storage_address(doc_id=doc_id)
|
235 |
|
236 |
if not DocumentService.remove_document(doc, tenant_id):
|
237 |
+
return get_error_data_result(
|
238 |
retmsg="Database error (Document removal)!")
|
239 |
|
240 |
f2d = File2DocumentService.get_by_document_id(doc_id)
|
|
|
246 |
errors += str(e)
|
247 |
|
248 |
if errors:
|
249 |
+
return get_result(retmsg=errors, retcode=RetCode.SERVER_ERROR)
|
250 |
|
251 |
+
return get_result()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
|
254 |
+
@manager.route('/dataset/<dataset_id>/chunk', methods=['POST'])
|
|
|
255 |
@token_required
|
256 |
+
def parse(tenant_id,dataset_id):
|
257 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
258 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
259 |
req = request.json
|
260 |
+
for id in req["document_ids"]:
|
261 |
+
if not DocumentService.query(id=id,kb_id=dataset_id):
|
262 |
+
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
263 |
+
info = {"run": "1", "progress": 0}
|
264 |
+
info["progress_msg"] = ""
|
265 |
+
info["chunk_num"] = 0
|
266 |
+
info["token_num"] = 0
|
267 |
+
DocumentService.update_by_id(id, info)
|
268 |
+
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
269 |
+
ELASTICSEARCH.deleteByQuery(
|
270 |
+
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
271 |
+
TaskService.filter_delete([Task.doc_id == id])
|
272 |
+
e, doc = DocumentService.get_by_id(id)
|
273 |
+
doc = doc.to_dict()
|
274 |
+
doc["tenant_id"] = tenant_id
|
275 |
+
bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"])
|
276 |
+
queue_tasks(doc, bucket, name)
|
277 |
+
return get_result()
|
278 |
+
|
279 |
+
@manager.route('/dataset/<dataset_id>/chunk', methods=['DELETE'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
@token_required
|
281 |
+
def stop_parsing(tenant_id,dataset_id):
|
282 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
283 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
284 |
req = request.json
|
285 |
+
for id in req["document_ids"]:
|
286 |
+
if not DocumentService.query(id=id,kb_id=dataset_id):
|
287 |
+
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
288 |
+
info = {"run": "2", "progress": 0}
|
289 |
+
DocumentService.update_by_id(id, info)
|
290 |
+
# if str(req["run"]) == TaskStatus.CANCEL.value:
|
291 |
+
tenant_id = DocumentService.get_tenant_id(id)
|
292 |
+
ELASTICSEARCH.deleteByQuery(
|
293 |
+
Q("match", doc_id=id), idxnm=search.index_name(tenant_id))
|
294 |
+
return get_result()
|
295 |
+
|
296 |
+
|
297 |
+
@manager.route('/dataset/{dataset_id}/document/{document_id}/chunk', methods=['GET'])
|
298 |
+
@token_required
|
299 |
+
def list_chunk(tenant_id,dataset_id,document_id):
|
300 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
301 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
302 |
+
doc=DocumentService.query(id=document_id, kb_id=dataset_id)
|
303 |
+
if not doc:
|
304 |
+
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
305 |
+
doc=doc[0]
|
306 |
+
req = request.args
|
307 |
+
doc_id = document_id
|
308 |
+
page = int(req.get("offset", 1))
|
309 |
+
size = int(req.get("limit", 30))
|
310 |
question = req.get("keywords", "")
|
311 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
312 |
query = {
|
313 |
"doc_ids": [doc_id], "page": page, "size": size, "question": question, "sort": True
|
314 |
}
|
|
|
317 |
sres = retrievaler.search(query, search.index_name(tenant_id), highlight=True)
|
318 |
res = {"total": sres.total, "chunks": [], "doc": doc.to_dict()}
|
319 |
|
320 |
+
origin_chunks = []
|
321 |
for id in sres.ids:
|
322 |
d = {
|
323 |
"chunk_id": id,
|
|
|
337 |
poss.append([float(d["positions"][i]), float(d["positions"][i + 1]), float(d["positions"][i + 2]),
|
338 |
float(d["positions"][i + 3]), float(d["positions"][i + 4])])
|
339 |
d["positions"] = poss
|
340 |
+
|
341 |
origin_chunks.append(d)
|
342 |
##rename keys
|
343 |
for chunk in origin_chunks:
|
|
|
346 |
"content_with_weight": "content",
|
347 |
"doc_id": "document_id",
|
348 |
"important_kwd": "important_keywords",
|
349 |
+
"img_id": "image_id",
|
350 |
}
|
351 |
renamed_chunk = {}
|
352 |
for key, value in chunk.items():
|
353 |
new_key = key_mapping.get(key, key)
|
354 |
renamed_chunk[new_key] = value
|
355 |
res["chunks"].append(renamed_chunk)
|
356 |
+
return get_result(data=res)
|
357 |
except Exception as e:
|
358 |
if str(e).find("not_found") > 0:
|
359 |
+
return get_result(retmsg=f'No chunk found!',
|
360 |
retcode=RetCode.DATA_ERROR)
|
361 |
return server_error_response(e)
|
362 |
|
363 |
|
364 |
+
@manager.route('/dataset/{dataset_id}/document/{document_id}/chunk', methods=['POST'])
|
365 |
@token_required
|
366 |
+
def create(tenant_id,dataset_id,document_id):
|
367 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
368 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
369 |
+
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
370 |
+
if not doc:
|
371 |
+
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
372 |
req = request.json
|
373 |
+
if not req.get("content"):
|
374 |
+
return get_error_data_result(retmsg="`content` is required")
|
375 |
md5 = hashlib.md5()
|
376 |
+
md5.update((req["content"] + document_id).encode("utf-8"))
|
377 |
|
378 |
chunk_id = md5.hexdigest()
|
379 |
d = {"id": chunk_id, "content_ltks": rag_tokenizer.tokenize(req["content"]),
|
|
|
383 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req.get("important_kwd", [])))
|
384 |
d["create_time"] = str(datetime.datetime.now()).replace("T", " ")[:19]
|
385 |
d["create_timestamp_flt"] = datetime.datetime.now().timestamp()
|
386 |
+
d["kb_id"] = [doc.kb_id]
|
387 |
+
d["docnm_kwd"] = doc.name
|
388 |
+
d["doc_id"] = doc.id
|
389 |
+
embd_id = DocumentService.get_embd_id(document_id)
|
390 |
+
embd_mdl = TenantLLMService.model_instance(
|
391 |
+
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
392 |
+
|
393 |
+
v, c = embd_mdl.encode([doc.name, req["content"]])
|
394 |
+
v = 0.1 * v[0] + 0.9 * v[1]
|
395 |
+
d["q_%d_vec" % len(v)] = v.tolist()
|
396 |
+
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
397 |
+
|
398 |
+
DocumentService.increment_chunk_num(
|
399 |
+
doc.id, doc.kb_id, c, 1, 0)
|
400 |
+
d["chunk_id"] = chunk_id
|
401 |
+
# rename keys
|
402 |
+
key_mapping = {
|
403 |
+
"chunk_id": "id",
|
404 |
+
"content_with_weight": "content",
|
405 |
+
"doc_id": "document_id",
|
406 |
+
"important_kwd": "important_keywords",
|
407 |
+
"kb_id": "dataset_id",
|
408 |
+
"create_timestamp_flt": "create_timestamp",
|
409 |
+
"create_time": "create_time",
|
410 |
+
"document_keyword": "document",
|
411 |
+
}
|
412 |
+
renamed_chunk = {}
|
413 |
+
for key, value in d.items():
|
414 |
+
if key in key_mapping:
|
415 |
+
new_key = key_mapping.get(key, key)
|
416 |
+
renamed_chunk[new_key] = value
|
417 |
+
return get_result(data={"chunk": renamed_chunk})
|
418 |
+
# return get_result(data={"chunk_id": chunk_id})
|
419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
+
@manager.route('dataset/{dataset_id}/document/{document_id}/chunk', methods=['DELETE'])
|
|
|
|
|
|
|
|
|
|
|
422 |
@token_required
|
423 |
+
def rm_chunk(tenant_id,dataset_id,document_id):
|
424 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
425 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
426 |
+
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
427 |
+
if not doc:
|
428 |
+
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
429 |
req = request.json
|
430 |
+
if not req.get("chunk_ids"):
|
431 |
+
return get_error_data_result("`chunk_ids` is required")
|
432 |
+
if not ELASTICSEARCH.deleteByQuery(
|
433 |
+
Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
|
434 |
+
return get_error_data_result(retmsg="Index updating failure")
|
435 |
+
deleted_chunk_ids = req["chunk_ids"]
|
436 |
+
chunk_number = len(deleted_chunk_ids)
|
437 |
+
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
|
438 |
+
return get_result()
|
439 |
+
|
440 |
+
|
|
|
|
|
441 |
|
442 |
+
@manager.route('/dataset/{dataset_id}/document/{document_id}/chunk/{chunk_id}', methods=['PUT'])
|
443 |
@token_required
|
444 |
+
def set(tenant_id,dataset_id,document_id,chunk_id):
|
445 |
+
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
446 |
+
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}.")
|
447 |
+
doc = DocumentService.query(id=document_id, kb_id=dataset_id)
|
448 |
+
if not doc:
|
449 |
+
return get_error_data_result(retmsg=f"You don't own the document {document_id}.")
|
450 |
req = request.json
|
451 |
+
if not req.get("content"):
|
452 |
+
return get_error_data_result("`content` is required")
|
453 |
+
if not req.get("important_keywords"):
|
454 |
+
return get_error_data_result("`important_keywords` is required")
|
455 |
d = {
|
456 |
+
"id": chunk_id,
|
457 |
"content_with_weight": req["content"]}
|
458 |
d["content_ltks"] = rag_tokenizer.tokenize(req["content"])
|
459 |
d["content_sm_ltks"] = rag_tokenizer.fine_grained_tokenize(d["content_ltks"])
|
|
|
461 |
d["important_tks"] = rag_tokenizer.tokenize(" ".join(req["important_keywords"]))
|
462 |
if "available" in req:
|
463 |
d["available_int"] = req["available"]
|
464 |
+
embd_id = DocumentService.get_embd_id(document_id)
|
465 |
+
embd_mdl = TenantLLMService.model_instance(
|
466 |
+
tenant_id, LLMType.EMBEDDING.value, embd_id)
|
467 |
+
if doc.parser_id == ParserType.QA:
|
468 |
+
arr = [
|
469 |
+
t for t in re.split(
|
470 |
+
r"[\n\t]",
|
471 |
+
req["content"]) if len(t) > 1]
|
472 |
+
if len(arr) != 2:
|
473 |
+
return get_error_data_result(
|
474 |
+
retmsg="Q&A must be separated by TAB/ENTER key.")
|
475 |
+
q, a = rmPrefix(arr[0]), rmPrefix(arr[1])
|
476 |
+
d = beAdoc(d, arr[0], arr[1], not any(
|
477 |
+
[rag_tokenizer.is_chinese(t) for t in q + a]))
|
478 |
+
|
479 |
+
v, c = embd_mdl.encode([doc.name, req["content"]])
|
480 |
+
v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1]
|
481 |
+
d["q_%d_vec" % len(v)] = v.tolist()
|
482 |
+
ELASTICSEARCH.upsert([d], search.index_name(tenant_id))
|
483 |
+
return get_result()
|
484 |
+
|
485 |
+
|
486 |
+
|
487 |
+
@manager.route('/retrieval', methods=['GET'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
@token_required
|
|
|
489 |
def retrieval_test(tenant_id):
|
490 |
+
req = request.args
|
491 |
+
if not req.get("datasets"):
|
492 |
+
return get_error_data_result("`datasets` is required.")
|
493 |
+
for id in req.get("datasets"):
|
494 |
+
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
495 |
+
return get_error_data_result(f"You don't own the dataset {id}.")
|
496 |
+
if not req.get("question"):
|
497 |
+
return get_error_data_result("`question` is required.")
|
498 |
+
page = int(req.get("offset", 1))
|
499 |
+
size = int(req.get("limit", 30))
|
500 |
question = req["question"]
|
501 |
+
kb_id = req["datasets"]
|
502 |
if isinstance(kb_id, str): kb_id = [kb_id]
|
503 |
+
doc_ids = req.get("documents", [])
|
504 |
similarity_threshold = float(req.get("similarity_threshold", 0.2))
|
505 |
vector_similarity_weight = float(req.get("vector_similarity_weight", 0.3))
|
506 |
top = int(req.get("top_k", 1024))
|
507 |
|
508 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
509 |
e, kb = KnowledgebaseService.get_by_id(kb_id[0])
|
510 |
if not e:
|
511 |
+
return get_error_data_result(retmsg="Knowledgebase not found!")
|
|
|
512 |
embd_mdl = TenantLLMService.model_instance(
|
513 |
kb.tenant_id, LLMType.EMBEDDING.value, llm_name=kb.embd_id)
|
514 |
|
|
|
530 |
del c["vector"]
|
531 |
|
532 |
##rename keys
|
533 |
+
renamed_chunks = []
|
534 |
for chunk in ranks["chunks"]:
|
535 |
key_mapping = {
|
536 |
"chunk_id": "id",
|
537 |
"content_with_weight": "content",
|
538 |
"doc_id": "document_id",
|
539 |
"important_kwd": "important_keywords",
|
540 |
+
"docnm_kwd": "document_keyword"
|
541 |
}
|
542 |
+
rename_chunk = {}
|
543 |
for key, value in chunk.items():
|
544 |
new_key = key_mapping.get(key, key)
|
545 |
rename_chunk[new_key] = value
|
546 |
renamed_chunks.append(rename_chunk)
|
547 |
ranks["chunks"] = renamed_chunks
|
548 |
+
return get_result(data=ranks)
|
549 |
except Exception as e:
|
550 |
if str(e).find("not_found") > 0:
|
551 |
+
return get_result(retmsg=f'No chunk found! Check the chunk status please!',
|
552 |
retcode=RetCode.DATA_ERROR)
|
553 |
return server_error_response(e)
|
api/apps/sdk/session.py
CHANGED
@@ -20,47 +20,18 @@ from flask import request, Response
|
|
20 |
|
21 |
from api.db import StatusEnum
|
22 |
from api.db.services.dialog_service import DialogService, ConversationService, chat
|
23 |
-
from api.settings import RetCode
|
24 |
from api.utils import get_uuid
|
25 |
-
from api.utils.api_utils import
|
26 |
-
from api.utils.api_utils import
|
27 |
|
28 |
-
|
29 |
-
@manager.route('/save', methods=['POST'])
|
30 |
@token_required
|
31 |
-
def
|
32 |
req = request.json
|
33 |
-
|
34 |
-
if "assistant_id" in req:
|
35 |
-
req["dialog_id"] = req.pop("assistant_id")
|
36 |
-
if "id" in req:
|
37 |
-
del req["id"]
|
38 |
-
conv = ConversationService.query(id=conv_id)
|
39 |
-
if not conv:
|
40 |
-
return get_data_error_result(retmsg="Session does not exist")
|
41 |
-
if not DialogService.query(id=conv[0].dialog_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
42 |
-
return get_data_error_result(retmsg="You do not own the session")
|
43 |
-
if req.get("dialog_id"):
|
44 |
-
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
45 |
-
if not dia:
|
46 |
-
return get_data_error_result(retmsg="You do not own the assistant")
|
47 |
-
if "dialog_id" in req and not req.get("dialog_id"):
|
48 |
-
return get_data_error_result(retmsg="assistant_id can not be empty.")
|
49 |
-
if "message" in req:
|
50 |
-
return get_data_error_result(retmsg="message can not be change")
|
51 |
-
if "reference" in req:
|
52 |
-
return get_data_error_result(retmsg="reference can not be change")
|
53 |
-
if "name" in req and not req.get("name"):
|
54 |
-
return get_data_error_result(retmsg="name can not be empty.")
|
55 |
-
if not ConversationService.update_by_id(conv_id, req):
|
56 |
-
return get_data_error_result(retmsg="Session updates error")
|
57 |
-
return get_json_result(data=True)
|
58 |
-
|
59 |
-
if not req.get("dialog_id"):
|
60 |
-
return get_data_error_result(retmsg="assistant_id is required.")
|
61 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
62 |
if not dia:
|
63 |
-
return
|
64 |
conv = {
|
65 |
"id": get_uuid(),
|
66 |
"dialog_id": req["dialog_id"],
|
@@ -68,33 +39,58 @@ def set_conversation(tenant_id):
|
|
68 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
69 |
}
|
70 |
if not conv.get("name"):
|
71 |
-
return
|
72 |
ConversationService.save(**conv)
|
73 |
e, conv = ConversationService.get_by_id(conv["id"])
|
74 |
if not e:
|
75 |
-
return
|
76 |
conv = conv.to_dict()
|
77 |
conv['messages'] = conv.pop("message")
|
78 |
-
conv["
|
79 |
del conv["reference"]
|
80 |
-
return
|
81 |
|
82 |
-
|
83 |
-
@manager.route('/completion', methods=['POST'])
|
84 |
@token_required
|
85 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
req = request.json
|
87 |
# req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
|
88 |
# {"role": "user", "content": "上海有吗?"}
|
89 |
# ]}
|
90 |
-
if
|
91 |
-
return
|
92 |
-
conv = ConversationService.query(id=
|
93 |
if not conv:
|
94 |
-
return
|
95 |
conv = conv[0]
|
96 |
-
if not DialogService.query(id=
|
97 |
-
return
|
98 |
msg = []
|
99 |
question = {
|
100 |
"content": req.get("question"),
|
@@ -108,7 +104,6 @@ def completion(tenant_id):
|
|
108 |
msg.append(m)
|
109 |
message_id = msg[-1].get("id")
|
110 |
e, dia = DialogService.get_by_id(conv.dialog_id)
|
111 |
-
del req["session_id"]
|
112 |
|
113 |
if not conv.reference:
|
114 |
conv.reference = []
|
@@ -130,13 +125,13 @@ def completion(tenant_id):
|
|
130 |
try:
|
131 |
for ans in chat(dia, msg, **req):
|
132 |
fillin_conv(ans)
|
133 |
-
yield "data:" + json.dumps({"
|
134 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
135 |
except Exception as e:
|
136 |
-
yield "data:" + json.dumps({"
|
137 |
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
138 |
ensure_ascii=False) + "\n\n"
|
139 |
-
yield "data:" + json.dumps({"
|
140 |
|
141 |
if req.get("stream", True):
|
142 |
resp = Response(stream(), mimetype="text/event-stream")
|
@@ -153,73 +148,31 @@ def completion(tenant_id):
|
|
153 |
fillin_conv(ans)
|
154 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
155 |
break
|
156 |
-
return
|
157 |
-
|
158 |
|
159 |
-
@manager.route('/
|
160 |
@token_required
|
161 |
-
def
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
if not
|
168 |
-
return
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
if
|
178 |
-
|
179 |
-
message_num = 0
|
180 |
-
chunk_num = 0
|
181 |
-
while message_num < len(messages):
|
182 |
-
if message_num != 0 and messages[message_num]["role"] != "user":
|
183 |
-
chunk_list = []
|
184 |
-
if "chunks" in conv["reference"][chunk_num]:
|
185 |
-
chunks = conv["reference"][chunk_num]["chunks"]
|
186 |
-
for chunk in chunks:
|
187 |
-
new_chunk = {
|
188 |
-
"id": chunk["chunk_id"],
|
189 |
-
"content": chunk["content_with_weight"],
|
190 |
-
"document_id": chunk["doc_id"],
|
191 |
-
"document_name": chunk["docnm_kwd"],
|
192 |
-
"knowledgebase_id": chunk["kb_id"],
|
193 |
-
"image_id": chunk["img_id"],
|
194 |
-
"similarity": chunk["similarity"],
|
195 |
-
"vector_similarity": chunk["vector_similarity"],
|
196 |
-
"term_similarity": chunk["term_similarity"],
|
197 |
-
"positions": chunk["positions"],
|
198 |
-
}
|
199 |
-
chunk_list.append(new_chunk)
|
200 |
-
chunk_num += 1
|
201 |
-
messages[message_num]["reference"] = chunk_list
|
202 |
-
message_num += 1
|
203 |
-
del conv["reference"]
|
204 |
-
return get_json_result(data=conv)
|
205 |
-
|
206 |
-
|
207 |
-
@manager.route('/list', methods=["GET"])
|
208 |
-
@token_required
|
209 |
-
def list(tenant_id):
|
210 |
-
assistant_id = request.args["assistant_id"]
|
211 |
-
if not DialogService.query(tenant_id=tenant_id, id=assistant_id, status=StatusEnum.VALID.value):
|
212 |
-
return get_json_result(
|
213 |
-
data=False, retmsg=f"You don't own the assistant.",
|
214 |
-
retcode=RetCode.OPERATING_ERROR)
|
215 |
-
convs = ConversationService.query(
|
216 |
-
dialog_id=assistant_id,
|
217 |
-
order_by=ConversationService.model.create_time,
|
218 |
-
reverse=True)
|
219 |
-
convs = [d.to_dict() for d in convs]
|
220 |
for conv in convs:
|
221 |
conv['messages'] = conv.pop("message")
|
222 |
-
conv["
|
223 |
if conv["reference"]:
|
224 |
messages = conv["messages"]
|
225 |
message_num = 0
|
@@ -247,20 +200,19 @@ def list(tenant_id):
|
|
247 |
messages[message_num]["reference"] = chunk_list
|
248 |
message_num += 1
|
249 |
del conv["reference"]
|
250 |
-
return
|
251 |
|
252 |
-
|
253 |
-
@manager.route('/delete', methods=["DELETE"])
|
254 |
@token_required
|
255 |
-
def delete(tenant_id):
|
256 |
-
id =
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
return
|
|
|
20 |
|
21 |
from api.db import StatusEnum
|
22 |
from api.db.services.dialog_service import DialogService, ConversationService, chat
|
|
|
23 |
from api.utils import get_uuid
|
24 |
+
from api.utils.api_utils import get_error_data_result
|
25 |
+
from api.utils.api_utils import get_result, token_required
|
26 |
|
27 |
+
@manager.route('/chat/<chat_id>/session', methods=['POST'])
|
|
|
28 |
@token_required
|
29 |
+
def create(tenant_id,chat_id):
|
30 |
req = request.json
|
31 |
+
req["dialog_id"] = chat_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
dia = DialogService.query(tenant_id=tenant_id, id=req["dialog_id"], status=StatusEnum.VALID.value)
|
33 |
if not dia:
|
34 |
+
return get_error_data_result(retmsg="You do not own the assistant")
|
35 |
conv = {
|
36 |
"id": get_uuid(),
|
37 |
"dialog_id": req["dialog_id"],
|
|
|
39 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
40 |
}
|
41 |
if not conv.get("name"):
|
42 |
+
return get_error_data_result(retmsg="Name can not be empty.")
|
43 |
ConversationService.save(**conv)
|
44 |
e, conv = ConversationService.get_by_id(conv["id"])
|
45 |
if not e:
|
46 |
+
return get_error_data_result(retmsg="Fail to create a session!")
|
47 |
conv = conv.to_dict()
|
48 |
conv['messages'] = conv.pop("message")
|
49 |
+
conv["chat_id"] = conv.pop("dialog_id")
|
50 |
del conv["reference"]
|
51 |
+
return get_result(data=conv)
|
52 |
|
53 |
+
@manager.route('/chat/<chat_id>/session/<session_id>', methods=['PUT'])
|
|
|
54 |
@token_required
|
55 |
+
def update(tenant_id,chat_id,session_id):
|
56 |
+
req = request.json
|
57 |
+
if "dialog_id" in req and req.get("dialog_id") != chat_id:
|
58 |
+
return get_error_data_result(retmsg="Can't change chat_id")
|
59 |
+
if "chat_id" in req and req.get("chat_id") != chat_id:
|
60 |
+
return get_error_data_result(retmsg="Can't change chat_id")
|
61 |
+
req["dialog_id"] = chat_id
|
62 |
+
conv_id = session_id
|
63 |
+
conv = ConversationService.query(id=conv_id,dialog_id=chat_id)
|
64 |
+
if not conv:
|
65 |
+
return get_error_data_result(retmsg="Session does not exist")
|
66 |
+
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
67 |
+
return get_error_data_result(retmsg="You do not own the session")
|
68 |
+
if "message" in req or "messages" in req:
|
69 |
+
return get_error_data_result(retmsg="Message can not be change")
|
70 |
+
if "reference" in req:
|
71 |
+
return get_error_data_result(retmsg="Reference can not be change")
|
72 |
+
if "name" in req and not req.get("name"):
|
73 |
+
return get_error_data_result(retmsg="Name can not be empty.")
|
74 |
+
if not ConversationService.update_by_id(conv_id, req):
|
75 |
+
return get_error_data_result(retmsg="Session updates error")
|
76 |
+
return get_result()
|
77 |
+
|
78 |
+
|
79 |
+
@manager.route('/chat/<chat_id>/session/<session_id>/completion', methods=['POST'])
|
80 |
+
@token_required
|
81 |
+
def completion(tenant_id,chat_id,session_id):
|
82 |
req = request.json
|
83 |
# req = {"conversation_id": "9aaaca4c11d311efa461fa163e197198", "messages": [
|
84 |
# {"role": "user", "content": "上海有吗?"}
|
85 |
# ]}
|
86 |
+
if not req.get("question"):
|
87 |
+
return get_error_data_result(retmsg="Please input your question.")
|
88 |
+
conv = ConversationService.query(id=session_id,dialog_id=chat_id)
|
89 |
if not conv:
|
90 |
+
return get_error_data_result(retmsg="Session does not exist")
|
91 |
conv = conv[0]
|
92 |
+
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
93 |
+
return get_error_data_result(retmsg="You do not own the session")
|
94 |
msg = []
|
95 |
question = {
|
96 |
"content": req.get("question"),
|
|
|
104 |
msg.append(m)
|
105 |
message_id = msg[-1].get("id")
|
106 |
e, dia = DialogService.get_by_id(conv.dialog_id)
|
|
|
107 |
|
108 |
if not conv.reference:
|
109 |
conv.reference = []
|
|
|
125 |
try:
|
126 |
for ans in chat(dia, msg, **req):
|
127 |
fillin_conv(ans)
|
128 |
+
yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
|
129 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
130 |
except Exception as e:
|
131 |
+
yield "data:" + json.dumps({"code": 500, "message": str(e),
|
132 |
"data": {"answer": "**ERROR**: " + str(e), "reference": []}},
|
133 |
ensure_ascii=False) + "\n\n"
|
134 |
+
yield "data:" + json.dumps({"code": 0, "data": True}, ensure_ascii=False) + "\n\n"
|
135 |
|
136 |
if req.get("stream", True):
|
137 |
resp = Response(stream(), mimetype="text/event-stream")
|
|
|
148 |
fillin_conv(ans)
|
149 |
ConversationService.update_by_id(conv.id, conv.to_dict())
|
150 |
break
|
151 |
+
return get_result(data=answer)
|
|
|
152 |
|
153 |
+
@manager.route('/chat/<chat_id>/session', methods=['GET'])
|
154 |
@token_required
|
155 |
+
def list(chat_id,tenant_id):
|
156 |
+
if not DialogService.query(tenant_id=tenant_id, id=chat_id, status=StatusEnum.VALID.value):
|
157 |
+
return get_error_data_result(retmsg=f"You don't own the assistant {chat_id}.")
|
158 |
+
id = request.args.get("id")
|
159 |
+
name = request.args.get("name")
|
160 |
+
session = ConversationService.query(id=id,name=name,dialog_id=chat_id)
|
161 |
+
if not session:
|
162 |
+
return get_error_data_result(retmsg="The session doesn't exist")
|
163 |
+
page_number = int(request.args.get("page", 1))
|
164 |
+
items_per_page = int(request.args.get("page_size", 1024))
|
165 |
+
orderby = request.args.get("orderby", "create_time")
|
166 |
+
if request.args.get("desc") == "False":
|
167 |
+
desc = False
|
168 |
+
else:
|
169 |
+
desc = True
|
170 |
+
convs = ConversationService.get_list(chat_id,page_number,items_per_page,orderby,desc,id,name)
|
171 |
+
if not convs:
|
172 |
+
return get_result(data=[])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
for conv in convs:
|
174 |
conv['messages'] = conv.pop("message")
|
175 |
+
conv["chat"] = conv.pop("dialog_id")
|
176 |
if conv["reference"]:
|
177 |
messages = conv["messages"]
|
178 |
message_num = 0
|
|
|
200 |
messages[message_num]["reference"] = chunk_list
|
201 |
message_num += 1
|
202 |
del conv["reference"]
|
203 |
+
return get_result(data=convs)
|
204 |
|
205 |
+
@manager.route('/chat/<chat_id>/session', methods=["DELETE"])
|
|
|
206 |
@token_required
|
207 |
+
def delete(tenant_id,chat_id):
|
208 |
+
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
209 |
+
return get_error_data_result(retmsg="You don't own the chat")
|
210 |
+
ids = request.json.get("ids")
|
211 |
+
if not ids:
|
212 |
+
return get_error_data_result(retmsg="`ids` is required in deleting operation")
|
213 |
+
for id in ids:
|
214 |
+
conv = ConversationService.query(id=id,dialog_id=chat_id)
|
215 |
+
if not conv:
|
216 |
+
return get_error_data_result(retmsg="The chat doesn't own the session")
|
217 |
+
ConversationService.delete_by_id(id)
|
218 |
+
return get_result()
|
api/db/services/dialog_service.py
CHANGED
@@ -19,6 +19,8 @@ import json
|
|
19 |
import re
|
20 |
from copy import deepcopy
|
21 |
from timeit import default_timer as timer
|
|
|
|
|
22 |
from api.db import LLMType, ParserType,StatusEnum
|
23 |
from api.db.db_models import Dialog, Conversation,DB
|
24 |
from api.db.services.common_service import CommonService
|
@@ -61,6 +63,22 @@ class DialogService(CommonService):
|
|
61 |
class ConversationService(CommonService):
|
62 |
model = Conversation
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def message_fit_in(msg, max_length=4000):
|
66 |
def count():
|
|
|
19 |
import re
|
20 |
from copy import deepcopy
|
21 |
from timeit import default_timer as timer
|
22 |
+
|
23 |
+
|
24 |
from api.db import LLMType, ParserType,StatusEnum
|
25 |
from api.db.db_models import Dialog, Conversation,DB
|
26 |
from api.db.services.common_service import CommonService
|
|
|
63 |
class ConversationService(CommonService):
|
64 |
model = Conversation
|
65 |
|
66 |
+
@classmethod
|
67 |
+
@DB.connection_context()
|
68 |
+
def get_list(cls,dialog_id,page_number, items_per_page, orderby, desc, id , name):
|
69 |
+
sessions = cls.model.select().where(cls.model.dialog_id ==dialog_id)
|
70 |
+
if id:
|
71 |
+
sessions = sessions.where(cls.model.id == id)
|
72 |
+
if name:
|
73 |
+
sessions = sessions.where(cls.model.name == name)
|
74 |
+
if desc:
|
75 |
+
sessions = sessions.order_by(cls.model.getter_by(orderby).desc())
|
76 |
+
else:
|
77 |
+
sessions = sessions.order_by(cls.model.getter_by(orderby).asc())
|
78 |
+
|
79 |
+
sessions = sessions.paginate(page_number, items_per_page)
|
80 |
+
|
81 |
+
return list(sessions.dicts())
|
82 |
|
83 |
def message_fit_in(msg, max_length=4000):
|
84 |
def count():
|
api/db/services/document_service.py
CHANGED
@@ -49,6 +49,29 @@ from rag.utils.redis_conn import REDIS_CONN
|
|
49 |
class DocumentService(CommonService):
|
50 |
model = Document
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
@classmethod
|
53 |
@DB.connection_context()
|
54 |
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
@@ -268,7 +291,7 @@ class DocumentService(CommonService):
|
|
268 |
@classmethod
|
269 |
@DB.connection_context()
|
270 |
def get_thumbnails(cls, docids):
|
271 |
-
fields = [cls.model.id, cls.model.
|
272 |
return list(cls.model.select(
|
273 |
*fields).where(cls.model.id.in_(docids)).dicts())
|
274 |
|
|
|
49 |
class DocumentService(CommonService):
|
50 |
model = Document
|
51 |
|
52 |
+
@classmethod
|
53 |
+
@DB.connection_context()
|
54 |
+
def get_list(cls, kb_id, page_number, items_per_page,
|
55 |
+
orderby, desc, keywords, id):
|
56 |
+
docs =cls.model.select().where(cls.model.kb_id==kb_id)
|
57 |
+
if id:
|
58 |
+
docs = docs.where(
|
59 |
+
cls.model.id== id )
|
60 |
+
if keywords:
|
61 |
+
docs = docs.where(
|
62 |
+
fn.LOWER(cls.model.name).contains(keywords.lower())
|
63 |
+
)
|
64 |
+
count = docs.count()
|
65 |
+
if desc:
|
66 |
+
docs = docs.order_by(cls.model.getter_by(orderby).desc())
|
67 |
+
else:
|
68 |
+
docs = docs.order_by(cls.model.getter_by(orderby).asc())
|
69 |
+
|
70 |
+
docs = docs.paginate(page_number, items_per_page)
|
71 |
+
|
72 |
+
return list(docs.dicts()), count
|
73 |
+
|
74 |
+
|
75 |
@classmethod
|
76 |
@DB.connection_context()
|
77 |
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
|
|
291 |
@classmethod
|
292 |
@DB.connection_context()
|
293 |
def get_thumbnails(cls, docids):
|
294 |
+
fields = [cls.model.id, cls.model.thumbnail]
|
295 |
return list(cls.model.select(
|
296 |
*fields).where(cls.model.id.in_(docids)).dicts())
|
297 |
|
api/http_api.md
CHANGED
@@ -1441,60 +1441,196 @@ Create a chat session
|
|
1441 |
### Request
|
1442 |
|
1443 |
- Method: POST
|
1444 |
-
- URL:
|
1445 |
- Headers:
|
1446 |
- `content-Type: application/json`
|
1447 |
-
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
|
|
|
|
1448 |
|
1449 |
#### Request example
|
|
|
1450 |
curl --request POST \
|
1451 |
--url http://{address}/api/v1/chat/{chat_id}/session \
|
1452 |
--header 'Content-Type: application/json' \
|
1453 |
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
|
1454 |
-
--data
|
1455 |
"name": "new session"
|
1456 |
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1457 |
|
1458 |
## List the sessions of a chat
|
1459 |
|
1460 |
-
**GET** `/api/v1/chat/{chat_id}/session`
|
1461 |
|
1462 |
-
List all the
|
1463 |
|
1464 |
### Request
|
1465 |
|
1466 |
- Method: GET
|
1467 |
-
- URL:
|
1468 |
- Headers:
|
1469 |
-
- `content-Type: application/json`
|
1470 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1471 |
|
1472 |
#### Request example
|
|
|
1473 |
curl --request GET \
|
1474 |
-
--url http://{address}/api/v1/chat/
|
1475 |
-
--header 'Content-Type: application/json' \
|
1476 |
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1477 |
|
1478 |
-
|
|
|
|
|
1479 |
|
1480 |
-
|
|
|
|
|
1481 |
|
1482 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1483 |
|
1484 |
### Request
|
1485 |
|
1486 |
- Method: DELETE
|
1487 |
-
- URL:
|
1488 |
- Headers:
|
1489 |
- `content-Type: application/json`
|
1490 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
|
|
|
|
1491 |
|
1492 |
#### Request example
|
|
|
|
|
1493 |
curl --request DELETE \
|
1494 |
-
|
1495 |
-
|
1496 |
-
|
|
|
|
|
|
|
|
|
1497 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1498 |
## Update a chat session
|
1499 |
|
1500 |
**PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
|
@@ -1504,20 +1640,45 @@ Update a chat session
|
|
1504 |
### Request
|
1505 |
|
1506 |
- Method: PUT
|
1507 |
-
- URL:
|
1508 |
- Headers:
|
1509 |
- `content-Type: application/json`
|
1510 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
|
|
|
|
1511 |
|
1512 |
#### Request example
|
|
|
1513 |
curl --request PUT \
|
1514 |
-
--url http://{address}/api/v1/chat/
|
1515 |
--header 'Content-Type: application/json' \
|
1516 |
-
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1517 |
-
--data
|
1518 |
"name": "Updated session"
|
1519 |
}'
|
1520 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1521 |
## Chat with a chat session
|
1522 |
|
1523 |
**POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
|
@@ -1527,17 +1688,139 @@ Chat with a chat session
|
|
1527 |
### Request
|
1528 |
|
1529 |
- Method: POST
|
1530 |
-
- URL:
|
1531 |
- Headers:
|
1532 |
- `content-Type: application/json`
|
1533 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
|
|
|
|
|
|
|
|
1534 |
|
1535 |
#### Request example
|
|
|
1536 |
curl --request POST \
|
1537 |
-
--url http://{address}/api/v1/chat/
|
1538 |
--header 'Content-Type: application/json' \
|
1539 |
-
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1540 |
--data-binary '{
|
1541 |
-
"question": "
|
1542 |
-
"stream": true
|
1543 |
}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1441 |
### Request
|
1442 |
|
1443 |
- Method: POST
|
1444 |
+
- URL: `http://{address}/api/v1/chat/{chat_id}/session`
|
1445 |
- Headers:
|
1446 |
- `content-Type: application/json`
|
1447 |
+
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1448 |
+
- Body:
|
1449 |
+
- name: `string`
|
1450 |
|
1451 |
#### Request example
|
1452 |
+
```bash
|
1453 |
curl --request POST \
|
1454 |
--url http://{address}/api/v1/chat/{chat_id}/session \
|
1455 |
--header 'Content-Type: application/json' \
|
1456 |
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
|
1457 |
+
--data '{
|
1458 |
"name": "new session"
|
1459 |
}'
|
1460 |
+
```
|
1461 |
+
#### Request parameters
|
1462 |
+
- `"id"`: (*Body parameter*)
|
1463 |
+
The ID of the created session used to identify different sessions.
|
1464 |
+
- `None`
|
1465 |
+
- `id` cannot be provided when creating.
|
1466 |
+
|
1467 |
+
- `"name"`: (*Body parameter*)
|
1468 |
+
The name of the created session.
|
1469 |
+
- `"New session"`
|
1470 |
+
|
1471 |
+
- `"messages"`: (*Body parameter*)
|
1472 |
+
The messages of the created session.
|
1473 |
+
- `[{"role": "assistant", "content": "Hi! I am your assistant, can I help you?"}]`
|
1474 |
+
- `messages` cannot be provided when creating.
|
1475 |
+
|
1476 |
+
- `"chat_id"`: (*Path parameter*)
|
1477 |
+
The ID of the associated chat.
|
1478 |
+
- `""`
|
1479 |
+
- `chat_id` cannot be changed.
|
1480 |
+
|
1481 |
+
### Response
|
1482 |
+
Success
|
1483 |
+
```json
|
1484 |
+
{
|
1485 |
+
"code": 0,
|
1486 |
+
"data": {
|
1487 |
+
"chat_id": "2ca4b22e878011ef88fe0242ac120005",
|
1488 |
+
"create_date": "Fri, 11 Oct 2024 08:46:14 GMT",
|
1489 |
+
"create_time": 1728636374571,
|
1490 |
+
"id": "4606b4ec87ad11efbc4f0242ac120006",
|
1491 |
+
"messages": [
|
1492 |
+
{
|
1493 |
+
"content": "Hi! I am your assistant,can I help you?",
|
1494 |
+
"role": "assistant"
|
1495 |
+
}
|
1496 |
+
],
|
1497 |
+
"name": "new session",
|
1498 |
+
"update_date": "Fri, 11 Oct 2024 08:46:14 GMT",
|
1499 |
+
"update_time": 1728636374571
|
1500 |
+
}
|
1501 |
+
}
|
1502 |
+
```
|
1503 |
+
Error
|
1504 |
+
```json
|
1505 |
+
{
|
1506 |
+
"code": 102,
|
1507 |
+
"message": "Name can not be empty."
|
1508 |
+
}
|
1509 |
+
```
|
1510 |
|
1511 |
## List the sessions of a chat
|
1512 |
|
1513 |
+
**GET** `/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
|
1514 |
|
1515 |
+
List all sessions under the chat based on the filtering criteria.
|
1516 |
|
1517 |
### Request
|
1518 |
|
1519 |
- Method: GET
|
1520 |
+
- URL: `http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id}`
|
1521 |
- Headers:
|
|
|
1522 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1523 |
|
1524 |
#### Request example
|
1525 |
+
```bash
|
1526 |
curl --request GET \
|
1527 |
+
--url http://{address}/api/v1/chat/{chat_id}/session?page={page}&page_size={page_size}&orderby={orderby}&desc={desc}&name={dataset_name}&id={dataset_id} \
|
|
|
1528 |
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1529 |
+
```
|
1530 |
+
|
1531 |
+
#### Request Parameters
|
1532 |
+
- `"page"`: (*Path parameter*)
|
1533 |
+
The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
|
1534 |
+
- `1`
|
1535 |
+
|
1536 |
+
- `"page_size"`: (*Path parameter*)
|
1537 |
+
The number of records to retrieve per page. This controls how many records will be included in each page.
|
1538 |
+
- `1024`
|
1539 |
+
|
1540 |
+
- `"orderby"`: (*Path parameter*)
|
1541 |
+
The field by which the records should be sorted. This specifies the attribute or column used to order the results.
|
1542 |
+
- `"create_time"`
|
1543 |
|
1544 |
+
- `"desc"`: (*Path parameter*)
|
1545 |
+
A boolean flag indicating whether the sorting should be in descending order.
|
1546 |
+
- `True`
|
1547 |
|
1548 |
+
- `"id"`: (*Path parameter*)
|
1549 |
+
The ID of the session to be retrieved.
|
1550 |
+
- `None`
|
1551 |
|
1552 |
+
- `"name"`: (*Path parameter*)
|
1553 |
+
The name of the session to be retrieved.
|
1554 |
+
- `None`
|
1555 |
+
### Response
|
1556 |
+
Success
|
1557 |
+
```json
|
1558 |
+
{
|
1559 |
+
"code": 0,
|
1560 |
+
"data": [
|
1561 |
+
{
|
1562 |
+
"chat": "2ca4b22e878011ef88fe0242ac120005",
|
1563 |
+
"create_date": "Fri, 11 Oct 2024 08:46:43 GMT",
|
1564 |
+
"create_time": 1728636403974,
|
1565 |
+
"id": "578d541e87ad11ef96b90242ac120006",
|
1566 |
+
"messages": [
|
1567 |
+
{
|
1568 |
+
"content": "Hi! I am your assistant,can I help you?",
|
1569 |
+
"role": "assistant"
|
1570 |
+
}
|
1571 |
+
],
|
1572 |
+
"name": "new session",
|
1573 |
+
"update_date": "Fri, 11 Oct 2024 08:46:43 GMT",
|
1574 |
+
"update_time": 1728636403974
|
1575 |
+
}
|
1576 |
+
]
|
1577 |
+
}
|
1578 |
+
```
|
1579 |
+
Error
|
1580 |
+
```json
|
1581 |
+
{
|
1582 |
+
"code": 102,
|
1583 |
+
"message": "The session doesn't exist"
|
1584 |
+
}
|
1585 |
+
```
|
1586 |
+
|
1587 |
+
|
1588 |
+
## Delete chat sessions
|
1589 |
+
|
1590 |
+
**DELETE** `/api/v1/chat/{chat_id}/session`
|
1591 |
+
|
1592 |
+
Delete chat sessions
|
1593 |
|
1594 |
### Request
|
1595 |
|
1596 |
- Method: DELETE
|
1597 |
+
- URL: `http://{address}/api/v1/chat/{chat_id}/session`
|
1598 |
- Headers:
|
1599 |
- `content-Type: application/json`
|
1600 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1601 |
+
- Body:
|
1602 |
+
- `ids`: List[string]
|
1603 |
|
1604 |
#### Request example
|
1605 |
+
```bash
|
1606 |
+
# Either id or name must be provided, but not both.
|
1607 |
curl --request DELETE \
|
1608 |
+
--url http://{address}/api/v1/chat/{chat_id}/session \
|
1609 |
+
--header 'Content-Type: application/json' \
|
1610 |
+
--header 'Authorization: Bear {YOUR_ACCESS_TOKEN}' \
|
1611 |
+
--data '{
|
1612 |
+
"ids": ["test_1", "test_2"]
|
1613 |
+
}'
|
1614 |
+
```
|
1615 |
|
1616 |
+
#### Request Parameters
|
1617 |
+
- `ids`: (*Body Parameter*)
|
1618 |
+
IDs of the sessions to be deleted.
|
1619 |
+
- `None`
|
1620 |
+
### Response
|
1621 |
+
Success
|
1622 |
+
```json
|
1623 |
+
{
|
1624 |
+
"code": 0
|
1625 |
+
}
|
1626 |
+
```
|
1627 |
+
Error
|
1628 |
+
```json
|
1629 |
+
{
|
1630 |
+
"code": 102,
|
1631 |
+
"message": "The chat doesn't own the session"
|
1632 |
+
}
|
1633 |
+
```
|
1634 |
## Update a chat session
|
1635 |
|
1636 |
**PUT** `/api/v1/chat/{chat_id}/session/{session_id}`
|
|
|
1640 |
### Request
|
1641 |
|
1642 |
- Method: PUT
|
1643 |
+
- URL: `http://{address}/api/v1/chat/{chat_id}/session/{session_id}`
|
1644 |
- Headers:
|
1645 |
- `content-Type: application/json`
|
1646 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1647 |
+
- Body:
|
1648 |
+
- `name`: string
|
1649 |
|
1650 |
#### Request example
|
1651 |
+
```bash
|
1652 |
curl --request PUT \
|
1653 |
+
--url http://{address}/api/v1/chat/{chat_id}/session/{session_id} \
|
1654 |
--header 'Content-Type: application/json' \
|
1655 |
+
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
|
1656 |
+
--data '{
|
1657 |
"name": "Updated session"
|
1658 |
}'
|
1659 |
|
1660 |
+
```
|
1661 |
+
|
1662 |
+
#### Request Parameter
|
1663 |
+
- `name`:(*Body Parameter)
|
1664 |
+
The name of the created session.
|
1665 |
+
- `None`
|
1666 |
+
|
1667 |
+
### Response
|
1668 |
+
Success
|
1669 |
+
```json
|
1670 |
+
{
|
1671 |
+
"code": 0
|
1672 |
+
}
|
1673 |
+
```
|
1674 |
+
Error
|
1675 |
+
```json
|
1676 |
+
{
|
1677 |
+
"code": 102,
|
1678 |
+
"message": "Name can not be empty."
|
1679 |
+
}
|
1680 |
+
```
|
1681 |
+
|
1682 |
## Chat with a chat session
|
1683 |
|
1684 |
**POST** `/api/v1/chat/{chat_id}/session/{session_id}/completion`
|
|
|
1688 |
### Request
|
1689 |
|
1690 |
- Method: POST
|
1691 |
+
- URL: `http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion`
|
1692 |
- Headers:
|
1693 |
- `content-Type: application/json`
|
1694 |
- 'Authorization: Bearer {YOUR_ACCESS_TOKEN}'
|
1695 |
+
- Body:
|
1696 |
+
- `question`: string
|
1697 |
+
- `stream`: bool
|
1698 |
+
|
1699 |
|
1700 |
#### Request example
|
1701 |
+
```bash
|
1702 |
curl --request POST \
|
1703 |
+
--url http://{address} /api/v1/chat/{chat_id}/session/{session_id}/completion \
|
1704 |
--header 'Content-Type: application/json' \
|
1705 |
+
--header 'Authorization: Bearer {YOUR_ACCESS_TOKEN}' \
|
1706 |
--data-binary '{
|
1707 |
+
"question": "你好!",
|
1708 |
+
"stream": true
|
1709 |
}'
|
1710 |
+
```
|
1711 |
+
#### Request Parameters
|
1712 |
+
- `question`:(*Body Parameter*)
|
1713 |
+
The question you want to ask.
|
1714 |
+
- question is required.
|
1715 |
+
`None`
|
1716 |
+
- `stream`: (*Body Parameter*)
|
1717 |
+
The approach of streaming text generation.
|
1718 |
+
`False`
|
1719 |
+
### Response
|
1720 |
+
Success
|
1721 |
+
```json
|
1722 |
+
data: {
|
1723 |
+
"code": 0,
|
1724 |
+
"data": {
|
1725 |
+
"answer": "您好!有什么具体的问题或者需要的帮助",
|
1726 |
+
"reference": {},
|
1727 |
+
"audio_binary": null,
|
1728 |
+
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
1729 |
+
}
|
1730 |
+
}
|
1731 |
+
|
1732 |
+
data: {
|
1733 |
+
"code": 0,
|
1734 |
+
"data": {
|
1735 |
+
"answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助",
|
1736 |
+
"reference": {},
|
1737 |
+
"audio_binary": null,
|
1738 |
+
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
1739 |
+
}
|
1740 |
+
}
|
1741 |
+
|
1742 |
+
data: {
|
1743 |
+
"code": 0,
|
1744 |
+
"data": {
|
1745 |
+
"answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取",
|
1746 |
+
"reference": {},
|
1747 |
+
"audio_binary": null,
|
1748 |
+
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
1749 |
+
}
|
1750 |
+
}
|
1751 |
+
|
1752 |
+
data: {
|
1753 |
+
"code": 0,
|
1754 |
+
"data": {
|
1755 |
+
"answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
|
1756 |
+
"reference": {},
|
1757 |
+
"audio_binary": null,
|
1758 |
+
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
1759 |
+
}
|
1760 |
+
}
|
1761 |
+
|
1762 |
+
data: {
|
1763 |
+
"code": 0,
|
1764 |
+
"data": {
|
1765 |
+
"answer": "您好!有什么具体的问题或者需要的帮助可以告诉我吗 ##0$$?我在这里是为了帮助您的。如果您有任何疑问或是需要获取某些信息,请随时提出。",
|
1766 |
+
"reference": {
|
1767 |
+
"total": 19,
|
1768 |
+
"chunks": [
|
1769 |
+
{
|
1770 |
+
"chunk_id": "9d87f9d70a0d8a7565694a81fd4c5d5f",
|
1771 |
+
"content_ltks": "当所有知识库内容都与问题无关时 ,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n以下���知识库:\r\n{knowledg}\r\n以上是知识库\r\n\"\"\"\r\n 1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n总结\r\n通过上面的介绍,可以对开源的 ragflow有了一个大致的了解,与前面的有道qanyth整体流程还是比较类似的。 ",
|
1772 |
+
"content_with_weight": "当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。",
|
1773 |
+
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
1774 |
+
"docnm_kwd": "1.txt",
|
1775 |
+
"kb_id": "c7ee74067a2c11efb21c0242ac120006",
|
1776 |
+
"important_kwd": [],
|
1777 |
+
"img_id": "",
|
1778 |
+
"similarity": 0.38337178633282265,
|
1779 |
+
"vector_similarity": 0.3321336754679629,
|
1780 |
+
"term_similarity": 0.4053309767034769,
|
1781 |
+
"positions": [
|
1782 |
+
""
|
1783 |
+
]
|
1784 |
+
},
|
1785 |
+
{
|
1786 |
+
"chunk_id": "895d34de762e674b43e8613c6fb54c6d",
|
1787 |
+
"content_ltks": "\r\n\r\n实际内容可能会超过大模型的输入token数量,因此在调用大模型前会调用api/db/servic/dialog_service.py文件中 messag_fit_in ()根据大模型可用的 token数量进行过滤。这部分与有道的 qanyth的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt ,即可作为大模型的输入了 ,默认的英文prompt如下所示:\r\n\r\n\"\"\"\r\nyou are an intellig assistant. pleas summar the content of the knowledg base to answer the question. pleas list thedata in the knowledg base and answer in detail. when all knowledg base content is irrelev to the question , your answer must includ the sentenc\"the answer you are lookfor isnot found in the knowledg base!\" answer needto consid chat history.\r\n here is the knowledg base:\r\n{ knowledg}\r\nthe abov is the knowledg base.\r\n\"\"\"\r\n1\r\n 2\r\n 3\r\n 4\r\n 5\r\n 6\r\n对应的中文prompt如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。 ",
|
1788 |
+
"content_with_weight": "\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。",
|
1789 |
+
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
1790 |
+
"docnm_kwd": "1.txt",
|
1791 |
+
"kb_id": "c7ee74067a2c11efb21c0242ac120006",
|
1792 |
+
"important_kwd": [],
|
1793 |
+
"img_id": "",
|
1794 |
+
"similarity": 0.2788204323926715,
|
1795 |
+
"vector_similarity": 0.35489427679953667,
|
1796 |
+
"term_similarity": 0.2462173562183008,
|
1797 |
+
"positions": [
|
1798 |
+
""
|
1799 |
+
]
|
1800 |
+
}
|
1801 |
+
],
|
1802 |
+
"doc_aggs": [
|
1803 |
+
{
|
1804 |
+
"doc_name": "1.txt",
|
1805 |
+
"doc_id": "5c5999ec7be811ef9cab0242ac120005",
|
1806 |
+
"count": 2
|
1807 |
+
}
|
1808 |
+
]
|
1809 |
+
},
|
1810 |
+
"prompt": "你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\n 以下是知识库:\n 当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。\r\n 以下是知识库:\r\n {knowledge}\r\n 以上是知识库\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n总结\r\n通过上面的介绍,可以对开源的 RagFlow 有了一个大致的了解,与前面的 有道 QAnything 整体流程还是比较类似的。\n\n------\n\n\r\n\r\n实际内容可能会超过大模型的输入 token 数量,因此在调用大模型前会调用 api/db/services/dialog_service.py 文件中 message_fit_in() 根据大模型可用的 token 数量进行过滤。这部分与有道的 QAnything 的实现大同小异,就不额外展开了。\r\n\r\n将检索的内容,历史聊天记录以及问题构造为 prompt,即可作为大模型的输入了,默认的英文 prompt 如下所示:\r\n\r\n\"\"\"\r\nYou are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence \"The answer you are looking for is not found in the knowledge base!\" Answers need to consider chat history.\r\n Here is the knowledge base:\r\n {knowledge}\r\n The above is the knowledge base.\r\n\"\"\"\r\n1\r\n2\r\n3\r\n4\r\n5\r\n6\r\n对应的中文 prompt 如下所示:\r\n\r\n\"\"\"\r\n你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。\n 以上是知识库。\n\n### Query:\n你好,请问有什么问题需要我帮忙解答吗?\n\n### Elapsed\n - Retrieval: 9131.1 ms\n - LLM: 12802.6 ms",
|
1811 |
+
"id": "31153052-7bac-4741-a513-ed07d853f29e"
|
1812 |
+
}
|
1813 |
+
}
|
1814 |
+
|
1815 |
+
data:{
|
1816 |
+
"code": 0,
|
1817 |
+
"data": true
|
1818 |
+
}
|
1819 |
+
```
|
1820 |
+
Error
|
1821 |
+
```json
|
1822 |
+
{
|
1823 |
+
"code": 102,
|
1824 |
+
"message": "Please input your question."
|
1825 |
+
}
|
1826 |
+
```
|
api/python_api_reference.md
CHANGED
@@ -906,7 +906,7 @@ Chat-session APIs
|
|
906 |
## Create session
|
907 |
|
908 |
```python
|
909 |
-
|
910 |
```
|
911 |
|
912 |
### Returns
|
@@ -916,8 +916,7 @@ A `session` object.
|
|
916 |
#### id: `str`
|
917 |
|
918 |
The id of the created session is used to identify different sessions.
|
919 |
-
-
|
920 |
-
- `id` is required in updating
|
921 |
|
922 |
#### name: `str`
|
923 |
|
@@ -936,10 +935,10 @@ Defaults:
|
|
936 |
[{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
937 |
```
|
938 |
|
939 |
-
####
|
940 |
|
941 |
-
The id of associated
|
942 |
-
- `
|
943 |
|
944 |
### Examples
|
945 |
|
@@ -947,81 +946,21 @@ The id of associated assistant. Defaults to `""`.
|
|
947 |
from ragflow import RAGFlow
|
948 |
|
949 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
950 |
-
assi = rag.
|
|
|
951 |
sess = assi.create_session()
|
952 |
```
|
953 |
|
954 |
-
## Retrieve session
|
955 |
|
956 |
-
|
957 |
-
Assistant.get_session(id: str) -> Session
|
958 |
-
```
|
959 |
-
|
960 |
-
### Parameters
|
961 |
-
|
962 |
-
#### id: `str`, *Required*
|
963 |
-
|
964 |
-
???????????????????????????????
|
965 |
-
|
966 |
-
### Returns
|
967 |
-
|
968 |
-
### Returns
|
969 |
-
|
970 |
-
A `session` object.
|
971 |
-
|
972 |
-
#### id: `str`
|
973 |
-
|
974 |
-
The id of the created session is used to identify different sessions.
|
975 |
-
- `id` cannot be provided in creating
|
976 |
-
- `id` is required in updating
|
977 |
-
|
978 |
-
#### name: `str`
|
979 |
-
|
980 |
-
The name of the created session. Defaults to `"New session"`.
|
981 |
-
|
982 |
-
#### messages: `List[Message]`
|
983 |
-
|
984 |
-
The messages of the created session.
|
985 |
-
- messages cannot be provided.
|
986 |
-
|
987 |
-
Defaults:
|
988 |
-
|
989 |
-
??????????????????????????????????????????????????????????????????????????????????????????????
|
990 |
-
|
991 |
-
```
|
992 |
-
[{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
993 |
-
```
|
994 |
-
|
995 |
-
#### assistant_id: `str`
|
996 |
-
|
997 |
-
|
998 |
-
???????????????????????????????????????How to get
|
999 |
-
|
1000 |
-
The id of associated assistant. Defaults to `""`.
|
1001 |
-
- `assistant_id` is required in creating if you use HTTP API.
|
1002 |
-
|
1003 |
-
### Examples
|
1004 |
-
|
1005 |
-
```python
|
1006 |
-
from ragflow import RAGFlow
|
1007 |
-
|
1008 |
-
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1009 |
-
assi = rag.get_assistant(name="Miss R")
|
1010 |
-
sess = assi.get_session(id="d5c55d2270dd11ef9bd90242ac120007")
|
1011 |
-
```
|
1012 |
-
|
1013 |
-
---
|
1014 |
-
|
1015 |
-
## Save session settings
|
1016 |
|
1017 |
```python
|
1018 |
-
Session.
|
1019 |
```
|
1020 |
|
1021 |
### Returns
|
1022 |
|
1023 |
-
|
1024 |
-
description:the case of updating a session, True or False.
|
1025 |
|
1026 |
### Examples
|
1027 |
|
@@ -1029,10 +968,10 @@ description:the case of updating a session, True or False.
|
|
1029 |
from ragflow import RAGFlow
|
1030 |
|
1031 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1032 |
-
assi = rag.
|
1033 |
-
|
1034 |
-
sess
|
1035 |
-
sess.
|
1036 |
```
|
1037 |
|
1038 |
---
|
@@ -1040,7 +979,7 @@ sess.save()
|
|
1040 |
## Chat
|
1041 |
|
1042 |
```python
|
1043 |
-
Session.
|
1044 |
```
|
1045 |
|
1046 |
### Parameters
|
@@ -1053,7 +992,6 @@ The question to start an AI chat. Defaults to `None`. ???????????????????
|
|
1053 |
|
1054 |
The approach of streaming text generation. When stream is True, it outputs results in a streaming fashion; otherwise, it outputs the complete result after the model has finished generating.
|
1055 |
|
1056 |
-
#### session_id: `str` ??????????????????
|
1057 |
|
1058 |
### Returns
|
1059 |
|
@@ -1098,7 +1036,8 @@ The auto-generated reference of the message. Each `chunk` object includes the fo
|
|
1098 |
from ragflow import RAGFlow
|
1099 |
|
1100 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1101 |
-
assi = rag.
|
|
|
1102 |
sess = assi.create_session()
|
1103 |
|
1104 |
print("\n==================== Miss R =====================\n")
|
@@ -1109,9 +1048,10 @@ while True:
|
|
1109 |
print("\n==================== Miss R =====================\n")
|
1110 |
|
1111 |
cont = ""
|
1112 |
-
for ans in sess.
|
1113 |
print(ans.content[len(cont):], end='', flush=True)
|
1114 |
cont = ans.content
|
|
|
1115 |
```
|
1116 |
|
1117 |
---
|
@@ -1119,7 +1059,14 @@ while True:
|
|
1119 |
## List sessions
|
1120 |
|
1121 |
```python
|
1122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1123 |
```
|
1124 |
|
1125 |
### Returns
|
@@ -1133,24 +1080,54 @@ description: the List contains information about multiple assistant object, with
|
|
1133 |
from ragflow import RAGFlow
|
1134 |
|
1135 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1136 |
-
assi = rag.
|
1137 |
-
|
1138 |
-
for sess in assi.
|
1139 |
print(sess)
|
1140 |
```
|
1141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1142 |
---
|
1143 |
|
1144 |
## Delete session
|
1145 |
|
1146 |
```python
|
1147 |
-
|
1148 |
```
|
1149 |
|
1150 |
### Returns
|
1151 |
|
1152 |
-
|
1153 |
-
description:the case of deleting a session, True or False.
|
1154 |
|
1155 |
### Examples
|
1156 |
|
@@ -1158,7 +1135,12 @@ description:the case of deleting a session, True or False.
|
|
1158 |
from ragflow import RAGFlow
|
1159 |
|
1160 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1161 |
-
assi = rag.
|
1162 |
-
|
1163 |
-
|
1164 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
906 |
## Create session
|
907 |
|
908 |
```python
|
909 |
+
Chat.create_session(name: str = "New session") -> Session
|
910 |
```
|
911 |
|
912 |
### Returns
|
|
|
916 |
#### id: `str`
|
917 |
|
918 |
The id of the created session is used to identify different sessions.
|
919 |
+
- id can not be provided in creating
|
|
|
920 |
|
921 |
#### name: `str`
|
922 |
|
|
|
935 |
[{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
936 |
```
|
937 |
|
938 |
+
#### chat_id: `str`
|
939 |
|
940 |
+
The id of associated chat
|
941 |
+
- `chat_id` can't be changed
|
942 |
|
943 |
### Examples
|
944 |
|
|
|
946 |
from ragflow import RAGFlow
|
947 |
|
948 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
949 |
+
assi = rag.list_chats(name="Miss R")
|
950 |
+
assi = assi[0]
|
951 |
sess = assi.create_session()
|
952 |
```
|
953 |
|
|
|
954 |
|
955 |
+
## Update session
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
956 |
|
957 |
```python
|
958 |
+
Session.update(update_message:dict)
|
959 |
```
|
960 |
|
961 |
### Returns
|
962 |
|
963 |
+
no return
|
|
|
964 |
|
965 |
### Examples
|
966 |
|
|
|
968 |
from ragflow import RAGFlow
|
969 |
|
970 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
971 |
+
assi = rag.list_chats(name="Miss R")
|
972 |
+
assi = assi[0]
|
973 |
+
sess = assi.create_session("new_session")
|
974 |
+
sess.update({"name": "Updated session"...})
|
975 |
```
|
976 |
|
977 |
---
|
|
|
979 |
## Chat
|
980 |
|
981 |
```python
|
982 |
+
Session.ask(question: str, stream: bool = False) -> Optional[Message, iter[Message]]
|
983 |
```
|
984 |
|
985 |
### Parameters
|
|
|
992 |
|
993 |
The approach of streaming text generation. When stream is True, it outputs results in a streaming fashion; otherwise, it outputs the complete result after the model has finished generating.
|
994 |
|
|
|
995 |
|
996 |
### Returns
|
997 |
|
|
|
1036 |
from ragflow import RAGFlow
|
1037 |
|
1038 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1039 |
+
assi = rag.list_chats(name="Miss R")
|
1040 |
+
assi = assi[0]
|
1041 |
sess = assi.create_session()
|
1042 |
|
1043 |
print("\n==================== Miss R =====================\n")
|
|
|
1048 |
print("\n==================== Miss R =====================\n")
|
1049 |
|
1050 |
cont = ""
|
1051 |
+
for ans in sess.ask(question, stream=True):
|
1052 |
print(ans.content[len(cont):], end='', flush=True)
|
1053 |
cont = ans.content
|
1054 |
+
|
1055 |
```
|
1056 |
|
1057 |
---
|
|
|
1059 |
## List sessions
|
1060 |
|
1061 |
```python
|
1062 |
+
Chat.list_sessions(
|
1063 |
+
page: int = 1,
|
1064 |
+
page_size: int = 1024,
|
1065 |
+
orderby: str = "create_time",
|
1066 |
+
desc: bool = True,
|
1067 |
+
id: str = None,
|
1068 |
+
name: str = None
|
1069 |
+
) -> List[Session]
|
1070 |
```
|
1071 |
|
1072 |
### Returns
|
|
|
1080 |
from ragflow import RAGFlow
|
1081 |
|
1082 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1083 |
+
assi = rag.list_chats(name="Miss R")
|
1084 |
+
assi = assi[0]
|
1085 |
+
for sess in assi.list_sessions():
|
1086 |
print(sess)
|
1087 |
```
|
1088 |
|
1089 |
+
### Parameters
|
1090 |
+
|
1091 |
+
#### page: `int`
|
1092 |
+
|
1093 |
+
The current page number to retrieve from the paginated data. This parameter determines which set of records will be fetched.
|
1094 |
+
- `1`
|
1095 |
+
|
1096 |
+
#### page_size: `int`
|
1097 |
+
|
1098 |
+
The number of records to retrieve per page. This controls how many records will be included in each page.
|
1099 |
+
- `1024`
|
1100 |
+
|
1101 |
+
#### orderby: `string`
|
1102 |
+
|
1103 |
+
The field by which the records should be sorted. This specifies the attribute or column used to order the results.
|
1104 |
+
- `"create_time"`
|
1105 |
+
|
1106 |
+
#### desc: `bool`
|
1107 |
+
|
1108 |
+
A boolean flag indicating whether the sorting should be in descending order.
|
1109 |
+
- `True`
|
1110 |
+
|
1111 |
+
#### id: `string`
|
1112 |
+
|
1113 |
+
The ID of the chat to be retrieved.
|
1114 |
+
- `None`
|
1115 |
+
|
1116 |
+
#### name: `string`
|
1117 |
+
|
1118 |
+
The name of the chat to be retrieved.
|
1119 |
+
- `None`
|
1120 |
---
|
1121 |
|
1122 |
## Delete session
|
1123 |
|
1124 |
```python
|
1125 |
+
Chat.delete_sessions(ids:List[str] = None)
|
1126 |
```
|
1127 |
|
1128 |
### Returns
|
1129 |
|
1130 |
+
no return
|
|
|
1131 |
|
1132 |
### Examples
|
1133 |
|
|
|
1135 |
from ragflow import RAGFlow
|
1136 |
|
1137 |
rag = RAGFlow(api_key="xxxxxx", base_url="http://xxx.xx.xx.xxx:9380")
|
1138 |
+
assi = rag.list_chats(name="Miss R")
|
1139 |
+
assi = assi[0]
|
1140 |
+
assi.delete_sessions(ids=["id_1","id_2"])
|
1141 |
+
```
|
1142 |
+
### Parameters
|
1143 |
+
#### ids: `List[string]`
|
1144 |
+
IDs of the sessions to be deleted.
|
1145 |
+
- `None`
|
1146 |
+
|
sdk/python/ragflow/modules/chat.py
CHANGED
@@ -51,28 +51,28 @@ class Chat(Base):
|
|
51 |
|
52 |
|
53 |
def create_session(self, name: str = "New session") -> Session:
|
54 |
-
res = self.post("/session
|
55 |
res = res.json()
|
56 |
-
if res.get("
|
57 |
return Session(self.rag, res['data'])
|
58 |
-
raise Exception(res["
|
59 |
|
60 |
-
def
|
61 |
-
|
|
|
62 |
res = res.json()
|
63 |
-
if res.get("
|
64 |
result_list = []
|
65 |
for data in res["data"]:
|
66 |
result_list.append(Session(self.rag, data))
|
67 |
return result_list
|
68 |
-
raise Exception(res["
|
69 |
|
70 |
-
def
|
71 |
-
res = self.
|
72 |
res = res.json()
|
73 |
-
if res.get("
|
74 |
-
|
75 |
-
raise Exception(res["retmsg"])
|
76 |
|
77 |
def get_prologue(self):
|
78 |
return self.prompt.opener
|
|
|
51 |
|
52 |
|
53 |
def create_session(self, name: str = "New session") -> Session:
|
54 |
+
res = self.post(f"/chat/{self.id}/session", {"name": name})
|
55 |
res = res.json()
|
56 |
+
if res.get("code") == 0:
|
57 |
return Session(self.rag, res['data'])
|
58 |
+
raise Exception(res["message"])
|
59 |
|
60 |
+
def list_sessions(self,page: int = 1, page_size: int = 1024, orderby: str = "create_time", desc: bool = True,
|
61 |
+
id: str = None, name: str = None) -> List[Session]:
|
62 |
+
res = self.get(f'/chat/{self.id}/session',{"page": page, "page_size": page_size, "orderby": orderby, "desc": desc, "id": id, "name": name} )
|
63 |
res = res.json()
|
64 |
+
if res.get("code") == 0:
|
65 |
result_list = []
|
66 |
for data in res["data"]:
|
67 |
result_list.append(Session(self.rag, data))
|
68 |
return result_list
|
69 |
+
raise Exception(res["message"])
|
70 |
|
71 |
+
def delete_sessions(self,ids):
|
72 |
+
res = self.rm(f"/chat/{self.id}/session", {"ids": ids})
|
73 |
res = res.json()
|
74 |
+
if res.get("code") != 0:
|
75 |
+
raise Exception(res.get("message"))
|
|
|
76 |
|
77 |
def get_prologue(self):
|
78 |
return self.prompt.opener
|
sdk/python/ragflow/modules/session.py
CHANGED
@@ -8,20 +8,20 @@ class Session(Base):
|
|
8 |
self.id = None
|
9 |
self.name = "New session"
|
10 |
self.messages = [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
11 |
-
self.
|
12 |
super().__init__(rag, res_dict)
|
13 |
|
14 |
-
def
|
15 |
for message in self.messages:
|
16 |
if "reference" in message:
|
17 |
message.pop("reference")
|
18 |
-
res = self.post("/session/completion",
|
19 |
-
{"
|
20 |
for line in res.iter_lines():
|
21 |
line = line.decode("utf-8")
|
22 |
if line.startswith("{"):
|
23 |
json_data = json.loads(line)
|
24 |
-
raise Exception(json_data["
|
25 |
if line.startswith("data:"):
|
26 |
json_data = json.loads(line[5:])
|
27 |
if json_data["data"] != True:
|
@@ -52,19 +52,12 @@ class Session(Base):
|
|
52 |
message = Message(self.rag, temp_dict)
|
53 |
yield message
|
54 |
|
55 |
-
def
|
56 |
-
res = self.
|
57 |
-
|
58 |
res = res.json()
|
59 |
-
if res.get("
|
60 |
-
|
61 |
-
|
62 |
-
def delete(self):
|
63 |
-
res = self.rm("/session/delete", {"id": self.id})
|
64 |
-
res = res.json()
|
65 |
-
if res.get("retmsg") == "success": return True
|
66 |
-
raise Exception(res.get("retmsg"))
|
67 |
-
|
68 |
|
69 |
class Message(Base):
|
70 |
def __init__(self, rag, res_dict):
|
|
|
8 |
self.id = None
|
9 |
self.name = "New session"
|
10 |
self.messages = [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
11 |
+
self.chat_id = None
|
12 |
super().__init__(rag, res_dict)
|
13 |
|
14 |
+
def ask(self, question: str, stream: bool = False):
|
15 |
for message in self.messages:
|
16 |
if "reference" in message:
|
17 |
message.pop("reference")
|
18 |
+
res = self.post(f"/chat/{self.chat_id}/session/{self.id}/completion",
|
19 |
+
{"question": question, "stream": True}, stream=stream)
|
20 |
for line in res.iter_lines():
|
21 |
line = line.decode("utf-8")
|
22 |
if line.startswith("{"):
|
23 |
json_data = json.loads(line)
|
24 |
+
raise Exception(json_data["message"])
|
25 |
if line.startswith("data:"):
|
26 |
json_data = json.loads(line[5:])
|
27 |
if json_data["data"] != True:
|
|
|
52 |
message = Message(self.rag, temp_dict)
|
53 |
yield message
|
54 |
|
55 |
+
def update(self,update_message):
|
56 |
+
res = self.put(f"/chat/{self.chat_id}/session/{self.id}",
|
57 |
+
update_message)
|
58 |
res = res.json()
|
59 |
+
if res.get("code") != 0:
|
60 |
+
raise Exception(res.get("message"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
class Message(Base):
|
63 |
def __init__(self, rag, res_dict):
|
sdk/python/test/t_session.py
CHANGED
@@ -7,52 +7,44 @@ class TestSession:
|
|
7 |
def test_create_session(self):
|
8 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
9 |
kb = rag.create_dataset(name="test_create_session")
|
10 |
-
assistant = rag.
|
11 |
session = assistant.create_session()
|
12 |
assert isinstance(session,Session), "Failed to create a session."
|
13 |
|
14 |
def test_create_chat_with_success(self):
|
15 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
16 |
kb = rag.create_dataset(name="test_create_chat")
|
17 |
-
assistant = rag.
|
18 |
session = assistant.create_session()
|
19 |
question = "What is AI"
|
20 |
-
for ans in session.
|
21 |
pass
|
22 |
assert not ans.content.startswith("**ERROR**"), "Please check this error."
|
23 |
|
24 |
-
def
|
25 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
26 |
kb = rag.create_dataset(name="test_delete_session")
|
27 |
-
assistant = rag.
|
28 |
session=assistant.create_session()
|
29 |
-
res=session.
|
30 |
-
assert res, "Failed to delete the dataset."
|
31 |
|
32 |
def test_update_session_with_success(self):
|
33 |
rag=RAGFlow(API_KEY,HOST_ADDRESS)
|
34 |
kb=rag.create_dataset(name="test_update_session")
|
35 |
-
assistant = rag.
|
36 |
session=assistant.create_session(name="old session")
|
37 |
-
session.name
|
38 |
-
res
|
39 |
-
assert res,"Failed to update the session"
|
40 |
|
41 |
-
def test_get_session_with_success(self):
|
42 |
-
rag=RAGFlow(API_KEY,HOST_ADDRESS)
|
43 |
-
kb=rag.create_dataset(name="test_get_session")
|
44 |
-
assistant = rag.create_assistant(name="test_get_session",knowledgebases=[kb])
|
45 |
-
session = assistant.create_session()
|
46 |
-
session_2= assistant.get_session(id=session.id)
|
47 |
-
assert session.to_json()==session_2.to_json(),"Failed to get the session"
|
48 |
|
49 |
-
def
|
50 |
rag=RAGFlow(API_KEY,HOST_ADDRESS)
|
51 |
kb=rag.create_dataset(name="test_list_session")
|
52 |
-
assistant=rag.
|
53 |
assistant.create_session("test_1")
|
54 |
assistant.create_session("test_2")
|
55 |
-
sessions=assistant.
|
56 |
if isinstance(sessions,list):
|
57 |
for session in sessions:
|
58 |
assert isinstance(session,Session),"Non-Session elements exist in the list"
|
|
|
7 |
def test_create_session(self):
|
8 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
9 |
kb = rag.create_dataset(name="test_create_session")
|
10 |
+
assistant = rag.create_chat(name="test_create_session", knowledgebases=[kb])
|
11 |
session = assistant.create_session()
|
12 |
assert isinstance(session,Session), "Failed to create a session."
|
13 |
|
14 |
def test_create_chat_with_success(self):
|
15 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
16 |
kb = rag.create_dataset(name="test_create_chat")
|
17 |
+
assistant = rag.create_chat(name="test_create_chat", knowledgebases=[kb])
|
18 |
session = assistant.create_session()
|
19 |
question = "What is AI"
|
20 |
+
for ans in session.ask(question, stream=True):
|
21 |
pass
|
22 |
assert not ans.content.startswith("**ERROR**"), "Please check this error."
|
23 |
|
24 |
+
def test_delete_sessions_with_success(self):
|
25 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
26 |
kb = rag.create_dataset(name="test_delete_session")
|
27 |
+
assistant = rag.create_chat(name="test_delete_session",knowledgebases=[kb])
|
28 |
session=assistant.create_session()
|
29 |
+
res=assistant.delete_sessions(ids=[session.id])
|
30 |
+
assert res is None, "Failed to delete the dataset."
|
31 |
|
32 |
def test_update_session_with_success(self):
|
33 |
rag=RAGFlow(API_KEY,HOST_ADDRESS)
|
34 |
kb=rag.create_dataset(name="test_update_session")
|
35 |
+
assistant = rag.create_chat(name="test_update_session",knowledgebases=[kb])
|
36 |
session=assistant.create_session(name="old session")
|
37 |
+
res=session.update({"name":"new session"})
|
38 |
+
assert res is None,"Failed to update the session"
|
|
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
def test_list_sessions_with_success(self):
|
42 |
rag=RAGFlow(API_KEY,HOST_ADDRESS)
|
43 |
kb=rag.create_dataset(name="test_list_session")
|
44 |
+
assistant=rag.create_chat(name="test_list_session",knowledgebases=[kb])
|
45 |
assistant.create_session("test_1")
|
46 |
assistant.create_session("test_2")
|
47 |
+
sessions=assistant.list_sessions()
|
48 |
if isinstance(sessions,list):
|
49 |
for session in sessions:
|
50 |
assert isinstance(session,Session),"Non-Session elements exist in the list"
|