GYH
commited on
Commit
·
f4cd7c3
1
Parent(s):
0711582
0517 list chunks (#821)
Browse files### What problem does this PR solve?
#717
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
- api/apps/api_app.py +43 -0
- api/db/services/document_service.py +13 -0
- docs/conversation_api.md +35 -0
api/apps/api_app.py
CHANGED
|
@@ -39,6 +39,9 @@ from itsdangerous import URLSafeTimedSerializer
|
|
| 39 |
from api.utils.file_utils import filename_type, thumbnail
|
| 40 |
from rag.utils.minio_conn import MINIO
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
def generate_confirmation_token(tenent_id):
|
| 44 |
serializer = URLSafeTimedSerializer(tenent_id)
|
|
@@ -347,3 +350,43 @@ def upload():
|
|
| 347 |
return server_error_response(e)
|
| 348 |
|
| 349 |
return get_json_result(data=doc_result.to_json())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
from api.utils.file_utils import filename_type, thumbnail
|
| 40 |
from rag.utils.minio_conn import MINIO
|
| 41 |
|
| 42 |
+
from rag.utils.es_conn import ELASTICSEARCH
|
| 43 |
+
from rag.nlp import search
|
| 44 |
+
from elasticsearch_dsl import Q
|
| 45 |
|
| 46 |
def generate_confirmation_token(tenent_id):
|
| 47 |
serializer = URLSafeTimedSerializer(tenent_id)
|
|
|
|
| 350 |
return server_error_response(e)
|
| 351 |
|
| 352 |
return get_json_result(data=doc_result.to_json())
|
| 353 |
+
|
| 354 |
+
|
| 355 |
+
@manager.route('/list_chunks', methods=['POST'])
|
| 356 |
+
# @login_required
|
| 357 |
+
def list_chunks():
|
| 358 |
+
token = request.headers.get('Authorization').split()[1]
|
| 359 |
+
objs = APIToken.query(token=token)
|
| 360 |
+
if not objs:
|
| 361 |
+
return get_json_result(
|
| 362 |
+
data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
|
| 363 |
+
|
| 364 |
+
form_data = request.form
|
| 365 |
+
|
| 366 |
+
try:
|
| 367 |
+
if "doc_name" in form_data.keys():
|
| 368 |
+
tenant_id = DocumentService.get_tenant_id_by_name(form_data['doc_name'])
|
| 369 |
+
q = Q("match", docnm_kwd=form_data['doc_name'])
|
| 370 |
+
|
| 371 |
+
elif "doc_id" in form_data.keys():
|
| 372 |
+
tenant_id = DocumentService.get_tenant_id(form_data['doc_id'])
|
| 373 |
+
q = Q("match", doc_id=form_data['doc_id'])
|
| 374 |
+
else:
|
| 375 |
+
return get_json_result(
|
| 376 |
+
data=False,retmsg="Can't find doc_name or doc_id"
|
| 377 |
+
)
|
| 378 |
+
|
| 379 |
+
res_es_search = ELASTICSEARCH.search(q,idxnm=search.index_name(tenant_id),timeout="600s")
|
| 380 |
+
|
| 381 |
+
res = [{} for _ in range(len(res_es_search['hits']['hits']))]
|
| 382 |
+
|
| 383 |
+
for index , chunk in enumerate(res_es_search['hits']['hits']):
|
| 384 |
+
res[index]['doc_name'] = chunk['_source']['docnm_kwd']
|
| 385 |
+
res[index]['content'] = chunk['_source']['content_with_weight']
|
| 386 |
+
if 'img_id' in chunk['_source'].keys():
|
| 387 |
+
res[index]['img_id'] = chunk['_source']['img_id']
|
| 388 |
+
|
| 389 |
+
except Exception as e:
|
| 390 |
+
return server_error_response(e)
|
| 391 |
+
|
| 392 |
+
return get_json_result(data=res)
|
api/db/services/document_service.py
CHANGED
|
@@ -166,6 +166,19 @@ class DocumentService(CommonService):
|
|
| 166 |
return
|
| 167 |
return docs[0]["tenant_id"]
|
| 168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
@classmethod
|
| 170 |
@DB.connection_context()
|
| 171 |
def get_thumbnails(cls, docids):
|
|
|
|
| 166 |
return
|
| 167 |
return docs[0]["tenant_id"]
|
| 168 |
|
| 169 |
+
@classmethod
|
| 170 |
+
@DB.connection_context()
|
| 171 |
+
def get_tenant_id_by_name(cls, name):
|
| 172 |
+
docs = cls.model.select(
|
| 173 |
+
Knowledgebase.tenant_id).join(
|
| 174 |
+
Knowledgebase, on=(
|
| 175 |
+
Knowledgebase.id == cls.model.kb_id)).where(
|
| 176 |
+
cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
|
| 177 |
+
docs = docs.dicts()
|
| 178 |
+
if not docs:
|
| 179 |
+
return
|
| 180 |
+
return docs[0]["tenant_id"]
|
| 181 |
+
|
| 182 |
@classmethod
|
| 183 |
@DB.connection_context()
|
| 184 |
def get_thumbnails(cls, docids):
|
docs/conversation_api.md
CHANGED
|
@@ -364,3 +364,38 @@ This is usually used when upload a file to.
|
|
| 364 |
}
|
| 365 |
|
| 366 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 364 |
}
|
| 365 |
|
| 366 |
```
|
| 367 |
+
|
| 368 |
+
## Get document chunks
|
| 369 |
+
|
| 370 |
+
Get the chunks of the document based on doc_name or doc_id.
|
| 371 |
+
### Path: /api/list_chunks/
|
| 372 |
+
### Method: POST
|
| 373 |
+
|
| 374 |
+
### Parameter:
|
| 375 |
+
|
| 376 |
+
| Name | Type | Optional | Description |
|
| 377 |
+
|----------|--------|----------|---------------------------------|
|
| 378 |
+
| `doc_name` | string | Yes | The name of the document in the knowledge base. It must not be empty if `doc_id` is not set.|
|
| 379 |
+
| `doc_id` | string | Yes | The ID of the document in the knowledge base. It must not be empty if `doc_name` is not set.|
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
### Response
|
| 383 |
+
```json
|
| 384 |
+
{
|
| 385 |
+
"data": [
|
| 386 |
+
{
|
| 387 |
+
"content": "Figure 14: Per-request neural-net processingof RL-Cache.\n103\n(sn)\nCPU\n 102\nGPU\n8101\n100\n8\n16 64 256 1K\n4K",
|
| 388 |
+
"doc_name": "RL-Cache.pdf",
|
| 389 |
+
"img_id": "0335167613f011ef91240242ac120006-b46c3524952f82dbe061ce9b123f2211"
|
| 390 |
+
},
|
| 391 |
+
{
|
| 392 |
+
"content": "4.3 ProcessingOverheadof RL-CacheACKNOWLEDGMENTSThis section evaluates how eectively our RL-Cache implemen-tation leverages modern multi-core CPUs and GPUs to keep the per-request neural-net processing overhead low. Figure 14 depictsThis researchwas supported inpart by the Regional Government of Madrid (grant P2018/TCS-4499, EdgeData-CM)andU.S. National Science Foundation (grants CNS-1763617 andCNS-1717179).REFERENCES",
|
| 393 |
+
"doc_name": "RL-Cache.pdf",
|
| 394 |
+
"img_id": "0335167613f011ef91240242ac120006-d4c12c43938eb55d2d8278eea0d7e6d7"
|
| 395 |
+
}
|
| 396 |
+
],
|
| 397 |
+
"retcode": 0,
|
| 398 |
+
"retmsg": "success"
|
| 399 |
+
}
|
| 400 |
+
|
| 401 |
+
```
|