liuhua
liuhua
commited on
Commit
·
ee8a916
1
Parent(s):
c4fcec1
Fix some issues in API (#2902)
Browse files### What problem does this PR solve?
Fix some issues in API
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <[email protected]>
- api/apps/sdk/chat.py +25 -26
- api/apps/sdk/dataset.py +36 -17
- api/apps/sdk/doc.py +23 -17
- api/apps/sdk/session.py +5 -5
- api/db/db_models.py +2 -2
- api/utils/api_utils.py +14 -1
- sdk/python/ragflow/modules/base.py +1 -1
- sdk/python/ragflow/modules/chat.py +0 -3
- sdk/python/ragflow/modules/dataset.py +78 -78
- sdk/python/ragflow/modules/document.py +19 -5
- sdk/python/ragflow/ragflow.py +5 -5
- sdk/python/test/t_document.py +2 -2
api/apps/sdk/chat.py
CHANGED
@@ -30,18 +30,17 @@ from api.utils.api_utils import get_result
|
|
30 |
@token_required
|
31 |
def create(tenant_id):
|
32 |
req=request.json
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
for
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
req["kb_ids"] = kb_list
|
45 |
# llm
|
46 |
llm = req.get("llm")
|
47 |
if llm:
|
@@ -81,24 +80,24 @@ def create(tenant_id):
|
|
81 |
else:
|
82 |
req["llm_id"] = tenant.llm_id
|
83 |
if not req.get("name"):
|
84 |
-
return get_error_data_result(retmsg="name is required.")
|
85 |
if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
86 |
-
return get_error_data_result(retmsg="Duplicated chat name in creating
|
87 |
# tenant_id
|
88 |
if req.get("tenant_id"):
|
89 |
-
return get_error_data_result(retmsg="tenant_id must not be provided.")
|
90 |
req["tenant_id"] = tenant_id
|
91 |
# prompt more parameter
|
92 |
default_prompt = {
|
93 |
-
"system": """
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
"prologue": "
|
98 |
"parameters": [
|
99 |
{"key": "knowledge", "optional": False}
|
100 |
],
|
101 |
-
"empty_response": "Sorry!
|
102 |
}
|
103 |
key_list_2 = ["system", "prologue", "parameters", "empty_response"]
|
104 |
if "prompt_config" not in req:
|
@@ -149,7 +148,7 @@ def update(tenant_id,chat_id):
|
|
149 |
req =request.json
|
150 |
if "knowledgebases" in req:
|
151 |
if not req.get("knowledgebases"):
|
152 |
-
return get_error_data_result(retmsg="knowledgebases can't be empty value")
|
153 |
kb_list = []
|
154 |
for kb in req.get("knowledgebases"):
|
155 |
if not kb["id"]:
|
@@ -189,10 +188,10 @@ def update(tenant_id,chat_id):
|
|
189 |
res = res.to_json()
|
190 |
if "llm_id" in req:
|
191 |
if not TenantLLMService.query(llm_name=req["llm_id"]):
|
192 |
-
return get_error_data_result(retmsg="
|
193 |
if "name" in req:
|
194 |
if not req.get("name"):
|
195 |
-
return get_error_data_result(retmsg="name is not empty.")
|
196 |
if req["name"].lower() != res["name"].lower() \
|
197 |
and len(
|
198 |
DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
|
@@ -224,7 +223,7 @@ def delete(tenant_id):
|
|
224 |
req = request.json
|
225 |
ids = req.get("ids")
|
226 |
if not ids:
|
227 |
-
return get_error_data_result(retmsg="ids are required")
|
228 |
for id in ids:
|
229 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
230 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
@@ -234,7 +233,7 @@ def delete(tenant_id):
|
|
234 |
|
235 |
@manager.route('/chat', methods=['GET'])
|
236 |
@token_required
|
237 |
-
def
|
238 |
id = request.args.get("id")
|
239 |
name = request.args.get("name")
|
240 |
chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value)
|
|
|
30 |
@token_required
|
31 |
def create(tenant_id):
|
32 |
req=request.json
|
33 |
+
ids= req.get("knowledgebases")
|
34 |
+
if not ids:
|
35 |
+
return get_error_data_result(retmsg="`knowledgebases` is required")
|
36 |
+
for kb_id in ids:
|
37 |
+
kbs = KnowledgebaseService.query(id=kb_id,tenant_id=tenant_id)
|
38 |
+
if not kbs:
|
39 |
+
return get_error_data_result(f"You don't own the dataset {kb_id}")
|
40 |
+
kb=kbs[0]
|
41 |
+
if kb.chunk_num == 0:
|
42 |
+
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
|
43 |
+
req["kb_ids"] = ids
|
|
|
44 |
# llm
|
45 |
llm = req.get("llm")
|
46 |
if llm:
|
|
|
80 |
else:
|
81 |
req["llm_id"] = tenant.llm_id
|
82 |
if not req.get("name"):
|
83 |
+
return get_error_data_result(retmsg="`name` is required.")
|
84 |
if DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
85 |
+
return get_error_data_result(retmsg="Duplicated chat name in creating chat.")
|
86 |
# tenant_id
|
87 |
if req.get("tenant_id"):
|
88 |
+
return get_error_data_result(retmsg="`tenant_id` must not be provided.")
|
89 |
req["tenant_id"] = tenant_id
|
90 |
# prompt more parameter
|
91 |
default_prompt = {
|
92 |
+
"system": """You are an intelligent assistant. Please summarize the content of the knowledge base to answer the question. Please list the data in the knowledge base and answer in detail. When all knowledge base content is irrelevant to the question, your answer must include the sentence "The answer you are looking for is not found in the knowledge base!" Answers need to consider chat history.
|
93 |
+
Here is the knowledge base:
|
94 |
+
{knowledge}
|
95 |
+
The above is the knowledge base.""",
|
96 |
+
"prologue": "Hi! I'm your assistant, what can I do for you?",
|
97 |
"parameters": [
|
98 |
{"key": "knowledge", "optional": False}
|
99 |
],
|
100 |
+
"empty_response": "Sorry! No relevant content was found in the knowledge base!"
|
101 |
}
|
102 |
key_list_2 = ["system", "prologue", "parameters", "empty_response"]
|
103 |
if "prompt_config" not in req:
|
|
|
148 |
req =request.json
|
149 |
if "knowledgebases" in req:
|
150 |
if not req.get("knowledgebases"):
|
151 |
+
return get_error_data_result(retmsg="`knowledgebases` can't be empty value")
|
152 |
kb_list = []
|
153 |
for kb in req.get("knowledgebases"):
|
154 |
if not kb["id"]:
|
|
|
188 |
res = res.to_json()
|
189 |
if "llm_id" in req:
|
190 |
if not TenantLLMService.query(llm_name=req["llm_id"]):
|
191 |
+
return get_error_data_result(retmsg="The `model_name` does not exist.")
|
192 |
if "name" in req:
|
193 |
if not req.get("name"):
|
194 |
+
return get_error_data_result(retmsg="`name` is not empty.")
|
195 |
if req["name"].lower() != res["name"].lower() \
|
196 |
and len(
|
197 |
DialogService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value)) > 0:
|
|
|
223 |
req = request.json
|
224 |
ids = req.get("ids")
|
225 |
if not ids:
|
226 |
+
return get_error_data_result(retmsg="`ids` are required")
|
227 |
for id in ids:
|
228 |
if not DialogService.query(tenant_id=tenant_id, id=id, status=StatusEnum.VALID.value):
|
229 |
return get_error_data_result(retmsg=f"You don't own the chat {id}")
|
|
|
233 |
|
234 |
@manager.route('/chat', methods=['GET'])
|
235 |
@token_required
|
236 |
+
def list_chat(tenant_id):
|
237 |
id = request.args.get("id")
|
238 |
name = request.args.get("name")
|
239 |
chat = DialogService.query(id=id,name=name,status=StatusEnum.VALID.value)
|
api/apps/sdk/dataset.py
CHANGED
@@ -25,28 +25,38 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
|
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
28 |
-
from api.utils.api_utils import get_result, token_required,get_error_data_result
|
|
|
29 |
|
30 |
@manager.route('/dataset', methods=['POST'])
|
31 |
@token_required
|
32 |
def create(tenant_id):
|
33 |
req = request.json
|
34 |
e, t = TenantService.get_by_id(tenant_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
if "tenant_id" in req or "embedding_model" in req:
|
36 |
return get_error_data_result(
|
37 |
-
retmsg="
|
38 |
chunk_count=req.get("chunk_count")
|
39 |
document_count=req.get("document_count")
|
40 |
if chunk_count or document_count:
|
41 |
-
return get_error_data_result(retmsg="chunk_count or document_count must be 0 or not be provided")
|
42 |
if "name" not in req:
|
43 |
return get_error_data_result(
|
44 |
-
retmsg="
|
45 |
req['id'] = get_uuid()
|
46 |
req["name"] = req["name"].strip()
|
47 |
if req["name"] == "":
|
48 |
return get_error_data_result(
|
49 |
-
retmsg="
|
50 |
if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
51 |
return get_error_data_result(
|
52 |
retmsg="Duplicated knowledgebase name in creating dataset.")
|
@@ -55,7 +65,7 @@ def create(tenant_id):
|
|
55 |
key_mapping = {
|
56 |
"chunk_num": "chunk_count",
|
57 |
"doc_num": "document_count",
|
58 |
-
"parser_id": "
|
59 |
"embd_id": "embedding_model"
|
60 |
}
|
61 |
mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
|
@@ -90,7 +100,7 @@ def delete(tenant_id):
|
|
90 |
File2DocumentService.delete_by_document_id(doc.id)
|
91 |
if not KnowledgebaseService.delete_by_id(id):
|
92 |
return get_error_data_result(
|
93 |
-
retmsg="Delete dataset error.(Database
|
94 |
return get_result(retcode=RetCode.SUCCESS)
|
95 |
|
96 |
@manager.route('/dataset/<dataset_id>', methods=['PUT'])
|
@@ -103,30 +113,39 @@ def update(tenant_id,dataset_id):
|
|
103 |
invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
|
104 |
if any(key in req for key in invalid_keys):
|
105 |
return get_error_data_result(retmsg="The input parameters are invalid.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
if "tenant_id" in req:
|
107 |
if req["tenant_id"] != tenant_id:
|
108 |
return get_error_data_result(
|
109 |
-
retmsg="Can't change tenant_id
|
110 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
111 |
if "chunk_count" in req:
|
112 |
if req["chunk_count"] != kb.chunk_num:
|
113 |
return get_error_data_result(
|
114 |
-
retmsg="Can't change chunk_count
|
115 |
req.pop("chunk_count")
|
116 |
if "document_count" in req:
|
117 |
if req['document_count'] != kb.doc_num:
|
118 |
return get_error_data_result(
|
119 |
-
retmsg="Can't change document_count
|
120 |
req.pop("document_count")
|
121 |
-
if "
|
122 |
-
if kb.chunk_num != 0 and req['
|
123 |
return get_error_data_result(
|
124 |
-
retmsg="If
|
125 |
-
req['parser_id'] = req.pop('
|
126 |
if "embedding_model" in req:
|
127 |
-
if kb.chunk_num != 0 and req['
|
128 |
return get_error_data_result(
|
129 |
-
retmsg="If
|
130 |
req['embd_id'] = req.pop('embedding_model')
|
131 |
if "name" in req:
|
132 |
req["name"] = req["name"].strip()
|
@@ -162,7 +181,7 @@ def list(tenant_id):
|
|
162 |
key_mapping = {
|
163 |
"chunk_num": "chunk_count",
|
164 |
"doc_num": "document_count",
|
165 |
-
"parser_id": "
|
166 |
"embd_id": "embedding_model"
|
167 |
}
|
168 |
renamed_data = {}
|
|
|
25 |
from api.db.services.user_service import TenantService
|
26 |
from api.settings import RetCode
|
27 |
from api.utils import get_uuid
|
28 |
+
from api.utils.api_utils import get_result, token_required, get_error_data_result, valid
|
29 |
+
|
30 |
|
31 |
@manager.route('/dataset', methods=['POST'])
|
32 |
@token_required
|
33 |
def create(tenant_id):
|
34 |
req = request.json
|
35 |
e, t = TenantService.get_by_id(tenant_id)
|
36 |
+
permission = req.get("permission")
|
37 |
+
language = req.get("language")
|
38 |
+
chunk_method = req.get("chunk_method")
|
39 |
+
valid_permission = ("me", "team")
|
40 |
+
valid_language =("Chinese", "English")
|
41 |
+
valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
|
42 |
+
check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
|
43 |
+
if check_validation:
|
44 |
+
return check_validation
|
45 |
if "tenant_id" in req or "embedding_model" in req:
|
46 |
return get_error_data_result(
|
47 |
+
retmsg="`tenant_id` or `embedding_model` must not be provided")
|
48 |
chunk_count=req.get("chunk_count")
|
49 |
document_count=req.get("document_count")
|
50 |
if chunk_count or document_count:
|
51 |
+
return get_error_data_result(retmsg="`chunk_count` or `document_count` must be 0 or not be provided")
|
52 |
if "name" not in req:
|
53 |
return get_error_data_result(
|
54 |
+
retmsg="`name` is not empty!")
|
55 |
req['id'] = get_uuid()
|
56 |
req["name"] = req["name"].strip()
|
57 |
if req["name"] == "":
|
58 |
return get_error_data_result(
|
59 |
+
retmsg="`name` is not empty string!")
|
60 |
if KnowledgebaseService.query(name=req["name"], tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
61 |
return get_error_data_result(
|
62 |
retmsg="Duplicated knowledgebase name in creating dataset.")
|
|
|
65 |
key_mapping = {
|
66 |
"chunk_num": "chunk_count",
|
67 |
"doc_num": "document_count",
|
68 |
+
"parser_id": "chunk_method",
|
69 |
"embd_id": "embedding_model"
|
70 |
}
|
71 |
mapped_keys = {new_key: req[old_key] for new_key, old_key in key_mapping.items() if old_key in req}
|
|
|
100 |
File2DocumentService.delete_by_document_id(doc.id)
|
101 |
if not KnowledgebaseService.delete_by_id(id):
|
102 |
return get_error_data_result(
|
103 |
+
retmsg="Delete dataset error.(Database error)")
|
104 |
return get_result(retcode=RetCode.SUCCESS)
|
105 |
|
106 |
@manager.route('/dataset/<dataset_id>', methods=['PUT'])
|
|
|
113 |
invalid_keys = {"id", "embd_id", "chunk_num", "doc_num", "parser_id"}
|
114 |
if any(key in req for key in invalid_keys):
|
115 |
return get_error_data_result(retmsg="The input parameters are invalid.")
|
116 |
+
permission = req.get("permission")
|
117 |
+
language = req.get("language")
|
118 |
+
chunk_method = req.get("chunk_method")
|
119 |
+
valid_permission = ("me", "team")
|
120 |
+
valid_language =("Chinese", "English")
|
121 |
+
valid_chunk_method = ("naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email")
|
122 |
+
check_validation=valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method)
|
123 |
+
if check_validation:
|
124 |
+
return check_validation
|
125 |
if "tenant_id" in req:
|
126 |
if req["tenant_id"] != tenant_id:
|
127 |
return get_error_data_result(
|
128 |
+
retmsg="Can't change `tenant_id`.")
|
129 |
e, kb = KnowledgebaseService.get_by_id(dataset_id)
|
130 |
if "chunk_count" in req:
|
131 |
if req["chunk_count"] != kb.chunk_num:
|
132 |
return get_error_data_result(
|
133 |
+
retmsg="Can't change `chunk_count`.")
|
134 |
req.pop("chunk_count")
|
135 |
if "document_count" in req:
|
136 |
if req['document_count'] != kb.doc_num:
|
137 |
return get_error_data_result(
|
138 |
+
retmsg="Can't change `document_count`.")
|
139 |
req.pop("document_count")
|
140 |
+
if "chunk_method" in req:
|
141 |
+
if kb.chunk_num != 0 and req['chunk_method'] != kb.parser_id:
|
142 |
return get_error_data_result(
|
143 |
+
retmsg="If `chunk_count` is not 0, `chunk_method` is not changeable.")
|
144 |
+
req['parser_id'] = req.pop('chunk_method')
|
145 |
if "embedding_model" in req:
|
146 |
+
if kb.chunk_num != 0 and req['embedding_model'] != kb.embd_id:
|
147 |
return get_error_data_result(
|
148 |
+
retmsg="If `chunk_count` is not 0, `embedding_method` is not changeable.")
|
149 |
req['embd_id'] = req.pop('embedding_model')
|
150 |
if "name" in req:
|
151 |
req["name"] = req["name"].strip()
|
|
|
181 |
key_mapping = {
|
182 |
"chunk_num": "chunk_count",
|
183 |
"doc_num": "document_count",
|
184 |
+
"parser_id": "chunk_method",
|
185 |
"embd_id": "embedding_model"
|
186 |
}
|
187 |
renamed_data = {}
|
api/apps/sdk/doc.py
CHANGED
@@ -88,20 +88,20 @@ def upload(dataset_id, tenant_id):
|
|
88 |
def update_doc(tenant_id, dataset_id, document_id):
|
89 |
req = request.json
|
90 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
91 |
-
return get_error_data_result(retmsg=
|
92 |
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
93 |
if not doc:
|
94 |
-
return get_error_data_result(retmsg=
|
95 |
doc = doc[0]
|
96 |
if "chunk_count" in req:
|
97 |
if req["chunk_count"] != doc.chunk_num:
|
98 |
-
return get_error_data_result(retmsg="Can't change chunk_count
|
99 |
if "token_count" in req:
|
100 |
if req["token_count"] != doc.token_num:
|
101 |
-
return get_error_data_result(retmsg="Can't change token_count
|
102 |
if "progress" in req:
|
103 |
if req['progress'] != doc.progress:
|
104 |
-
return get_error_data_result(retmsg="Can't change progress
|
105 |
|
106 |
if "name" in req and req["name"] != doc.name:
|
107 |
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
@@ -121,8 +121,8 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
121 |
FileService.update_by_id(file.id, {"name": req["name"]})
|
122 |
if "parser_config" in req:
|
123 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
124 |
-
if "
|
125 |
-
if doc.parser_id.lower() == req["
|
126 |
return get_result()
|
127 |
|
128 |
if doc.type == FileType.VISUAL or re.search(
|
@@ -130,7 +130,7 @@ def update_doc(tenant_id, dataset_id, document_id):
|
|
130 |
return get_error_data_result(retmsg="Not supported yet!")
|
131 |
|
132 |
e = DocumentService.update_by_id(doc.id,
|
133 |
-
{"parser_id": req["
|
134 |
"run": TaskStatus.UNSTART.value})
|
135 |
if not e:
|
136 |
return get_error_data_result(retmsg="Document not found!")
|
@@ -196,7 +196,7 @@ def list_docs(dataset_id, tenant_id):
|
|
196 |
"chunk_num": "chunk_count",
|
197 |
"kb_id": "knowledgebase_id",
|
198 |
"token_num": "token_count",
|
199 |
-
"parser_id": "
|
200 |
}
|
201 |
renamed_doc = {}
|
202 |
for key, value in doc.items():
|
@@ -213,7 +213,7 @@ def delete(tenant_id,dataset_id):
|
|
213 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
214 |
req = request.json
|
215 |
if not req.get("ids"):
|
216 |
-
return get_error_data_result(retmsg="ids is required")
|
217 |
doc_ids = req["ids"]
|
218 |
root_folder = FileService.get_root_folder(tenant_id)
|
219 |
pf_id = root_folder["id"]
|
@@ -457,7 +457,7 @@ def rm_chunk(tenant_id,dataset_id,document_id):
|
|
457 |
|
458 |
@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
|
459 |
@token_required
|
460 |
-
def
|
461 |
try:
|
462 |
res = ELASTICSEARCH.get(
|
463 |
chunk_id, search.index_name(
|
@@ -519,9 +519,15 @@ def retrieval_test(tenant_id):
|
|
519 |
req = request.json
|
520 |
if not req.get("datasets"):
|
521 |
return get_error_data_result("`datasets` is required.")
|
522 |
-
|
523 |
-
|
524 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
525 |
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
526 |
return get_error_data_result(f"You don't own the dataset {id}.")
|
527 |
if "question" not in req:
|
@@ -538,7 +544,7 @@ def retrieval_test(tenant_id):
|
|
538 |
else:
|
539 |
highlight = True
|
540 |
try:
|
541 |
-
e, kb = KnowledgebaseService.get_by_id(
|
542 |
if not e:
|
543 |
return get_error_data_result(retmsg="Knowledgebase not found!")
|
544 |
embd_mdl = TenantLLMService.model_instance(
|
@@ -554,7 +560,7 @@ def retrieval_test(tenant_id):
|
|
554 |
question += keyword_extraction(chat_mdl, question)
|
555 |
|
556 |
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
|
557 |
-
ranks = retr.retrieval(question, embd_mdl, kb.tenant_id,
|
558 |
similarity_threshold, vector_similarity_weight, top,
|
559 |
doc_ids, rerank_mdl=rerank_mdl, highlight=highlight)
|
560 |
for c in ranks["chunks"]:
|
@@ -580,6 +586,6 @@ def retrieval_test(tenant_id):
|
|
580 |
return get_result(data=ranks)
|
581 |
except Exception as e:
|
582 |
if str(e).find("not_found") > 0:
|
583 |
-
return get_result(retmsg=f'No chunk found! Check the chunk
|
584 |
retcode=RetCode.DATA_ERROR)
|
585 |
return server_error_response(e)
|
|
|
88 |
def update_doc(tenant_id, dataset_id, document_id):
|
89 |
req = request.json
|
90 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
91 |
+
return get_error_data_result(retmsg="You don't own the dataset.")
|
92 |
doc = DocumentService.query(kb_id=dataset_id, id=document_id)
|
93 |
if not doc:
|
94 |
+
return get_error_data_result(retmsg="The dataset doesn't own the document.")
|
95 |
doc = doc[0]
|
96 |
if "chunk_count" in req:
|
97 |
if req["chunk_count"] != doc.chunk_num:
|
98 |
+
return get_error_data_result(retmsg="Can't change `chunk_count`.")
|
99 |
if "token_count" in req:
|
100 |
if req["token_count"] != doc.token_num:
|
101 |
+
return get_error_data_result(retmsg="Can't change `token_count`.")
|
102 |
if "progress" in req:
|
103 |
if req['progress'] != doc.progress:
|
104 |
+
return get_error_data_result(retmsg="Can't change `progress`.")
|
105 |
|
106 |
if "name" in req and req["name"] != doc.name:
|
107 |
if pathlib.Path(req["name"].lower()).suffix != pathlib.Path(doc.name.lower()).suffix:
|
|
|
121 |
FileService.update_by_id(file.id, {"name": req["name"]})
|
122 |
if "parser_config" in req:
|
123 |
DocumentService.update_parser_config(doc.id, req["parser_config"])
|
124 |
+
if "chunk_method" in req:
|
125 |
+
if doc.parser_id.lower() == req["chunk_method"].lower():
|
126 |
return get_result()
|
127 |
|
128 |
if doc.type == FileType.VISUAL or re.search(
|
|
|
130 |
return get_error_data_result(retmsg="Not supported yet!")
|
131 |
|
132 |
e = DocumentService.update_by_id(doc.id,
|
133 |
+
{"parser_id": req["chunk_method"], "progress": 0, "progress_msg": "",
|
134 |
"run": TaskStatus.UNSTART.value})
|
135 |
if not e:
|
136 |
return get_error_data_result(retmsg="Document not found!")
|
|
|
196 |
"chunk_num": "chunk_count",
|
197 |
"kb_id": "knowledgebase_id",
|
198 |
"token_num": "token_count",
|
199 |
+
"parser_id": "chunk_method"
|
200 |
}
|
201 |
renamed_doc = {}
|
202 |
for key, value in doc.items():
|
|
|
213 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
214 |
req = request.json
|
215 |
if not req.get("ids"):
|
216 |
+
return get_error_data_result(retmsg="`ids` is required")
|
217 |
doc_ids = req["ids"]
|
218 |
root_folder = FileService.get_root_folder(tenant_id)
|
219 |
pf_id = root_folder["id"]
|
|
|
457 |
|
458 |
@manager.route('/dataset/<dataset_id>/document/<document_id>/chunk/<chunk_id>', methods=['PUT'])
|
459 |
@token_required
|
460 |
+
def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
|
461 |
try:
|
462 |
res = ELASTICSEARCH.get(
|
463 |
chunk_id, search.index_name(
|
|
|
519 |
req = request.json
|
520 |
if not req.get("datasets"):
|
521 |
return get_error_data_result("`datasets` is required.")
|
522 |
+
kb_ids = req["datasets"]
|
523 |
+
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
524 |
+
embd_nms = list(set([kb.embd_id for kb in kbs]))
|
525 |
+
if len(embd_nms) != 1:
|
526 |
+
return get_result(
|
527 |
+
retmsg='Knowledge bases use different embedding models or does not exist."',
|
528 |
+
retcode=RetCode.AUTHENTICATION_ERROR)
|
529 |
+
if isinstance(kb_ids, str): kb_ids = [kb_ids]
|
530 |
+
for id in kb_ids:
|
531 |
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
532 |
return get_error_data_result(f"You don't own the dataset {id}.")
|
533 |
if "question" not in req:
|
|
|
544 |
else:
|
545 |
highlight = True
|
546 |
try:
|
547 |
+
e, kb = KnowledgebaseService.get_by_id(kb_ids[0])
|
548 |
if not e:
|
549 |
return get_error_data_result(retmsg="Knowledgebase not found!")
|
550 |
embd_mdl = TenantLLMService.model_instance(
|
|
|
560 |
question += keyword_extraction(chat_mdl, question)
|
561 |
|
562 |
retr = retrievaler if kb.parser_id != ParserType.KG else kg_retrievaler
|
563 |
+
ranks = retr.retrieval(question, embd_mdl, kb.tenant_id, kb_ids, page, size,
|
564 |
similarity_threshold, vector_similarity_weight, top,
|
565 |
doc_ids, rerank_mdl=rerank_mdl, highlight=highlight)
|
566 |
for c in ranks["chunks"]:
|
|
|
586 |
return get_result(data=ranks)
|
587 |
except Exception as e:
|
588 |
if str(e).find("not_found") > 0:
|
589 |
+
return get_result(retmsg=f'No chunk found! Check the chunk status please!',
|
590 |
retcode=RetCode.DATA_ERROR)
|
591 |
return server_error_response(e)
|
api/apps/sdk/session.py
CHANGED
@@ -39,7 +39,7 @@ def create(tenant_id,chat_id):
|
|
39 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
40 |
}
|
41 |
if not conv.get("name"):
|
42 |
-
return get_error_data_result(retmsg="
|
43 |
ConversationService.save(**conv)
|
44 |
e, conv = ConversationService.get_by_id(conv["id"])
|
45 |
if not e:
|
@@ -62,11 +62,11 @@ def update(tenant_id,chat_id,session_id):
|
|
62 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
63 |
return get_error_data_result(retmsg="You do not own the session")
|
64 |
if "message" in req or "messages" in req:
|
65 |
-
return get_error_data_result(retmsg="
|
66 |
if "reference" in req:
|
67 |
-
return get_error_data_result(retmsg="
|
68 |
if "name" in req and not req.get("name"):
|
69 |
-
return get_error_data_result(retmsg="
|
70 |
if not ConversationService.update_by_id(conv_id, req):
|
71 |
return get_error_data_result(retmsg="Session updates error")
|
72 |
return get_result()
|
@@ -87,7 +87,7 @@ def completion(tenant_id,chat_id):
|
|
87 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
88 |
}
|
89 |
if not conv.get("name"):
|
90 |
-
return get_error_data_result(retmsg="
|
91 |
ConversationService.save(**conv)
|
92 |
e, conv = ConversationService.get_by_id(conv["id"])
|
93 |
session_id=conv.id
|
|
|
39 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
40 |
}
|
41 |
if not conv.get("name"):
|
42 |
+
return get_error_data_result(retmsg="`name` can not be empty.")
|
43 |
ConversationService.save(**conv)
|
44 |
e, conv = ConversationService.get_by_id(conv["id"])
|
45 |
if not e:
|
|
|
62 |
if not DialogService.query(id=chat_id, tenant_id=tenant_id, status=StatusEnum.VALID.value):
|
63 |
return get_error_data_result(retmsg="You do not own the session")
|
64 |
if "message" in req or "messages" in req:
|
65 |
+
return get_error_data_result(retmsg="`message` can not be change")
|
66 |
if "reference" in req:
|
67 |
+
return get_error_data_result(retmsg="`reference` can not be change")
|
68 |
if "name" in req and not req.get("name"):
|
69 |
+
return get_error_data_result(retmsg="`name` can not be empty.")
|
70 |
if not ConversationService.update_by_id(conv_id, req):
|
71 |
return get_error_data_result(retmsg="Session updates error")
|
72 |
return get_result()
|
|
|
87 |
"message": [{"role": "assistant", "content": "Hi! I am your assistant,can I help you?"}]
|
88 |
}
|
89 |
if not conv.get("name"):
|
90 |
+
return get_error_data_result(retmsg="`name` can not be empty.")
|
91 |
ConversationService.save(**conv)
|
92 |
e, conv = ConversationService.get_by_id(conv["id"])
|
93 |
session_id=conv.id
|
api/db/db_models.py
CHANGED
@@ -879,8 +879,8 @@ class Dialog(DataBaseModel):
|
|
879 |
default="simple",
|
880 |
help_text="simple|advanced",
|
881 |
index=True)
|
882 |
-
prompt_config = JSONField(null=False, default={"system": "", "prologue": "
|
883 |
-
"parameters": [], "empty_response": "Sorry!
|
884 |
|
885 |
similarity_threshold = FloatField(default=0.2)
|
886 |
vector_similarity_weight = FloatField(default=0.3)
|
|
|
879 |
default="simple",
|
880 |
help_text="simple|advanced",
|
881 |
index=True)
|
882 |
+
prompt_config = JSONField(null=False, default={"system": "", "prologue": "Hi! I'm your assistant, what can I do for you?",
|
883 |
+
"parameters": [], "empty_response": "Sorry! No relevant content was found in the knowledge base!"})
|
884 |
|
885 |
similarity_threshold = FloatField(default=0.2)
|
886 |
vector_similarity_weight = FloatField(default=0.3)
|
api/utils/api_utils.py
CHANGED
@@ -324,4 +324,17 @@ def get_error_data_result(retmsg='Sorry! Data missing!', retcode=RetCode.DATA_ER
|
|
324 |
|
325 |
def generate_confirmation_token(tenent_id):
|
326 |
serializer = URLSafeTimedSerializer(tenent_id)
|
327 |
-
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
|
325 |
def generate_confirmation_token(tenent_id):
|
326 |
serializer = URLSafeTimedSerializer(tenent_id)
|
327 |
+
return "ragflow-" + serializer.dumps(get_uuid(), salt=tenent_id)[2:34]
|
328 |
+
|
329 |
+
|
330 |
+
def valid(permission,valid_permission,language,valid_language,chunk_method,valid_chunk_method):
|
331 |
+
if valid_parameter(permission,valid_permission):
|
332 |
+
return valid_parameter(permission,valid_permission)
|
333 |
+
if valid_parameter(language,valid_language):
|
334 |
+
return valid_parameter(language,valid_language)
|
335 |
+
if valid_parameter(chunk_method,valid_chunk_method):
|
336 |
+
return valid_parameter(chunk_method,valid_chunk_method)
|
337 |
+
|
338 |
+
def valid_parameter(parameter,valid_values):
|
339 |
+
if parameter and parameter not in valid_values:
|
340 |
+
return get_error_data_result(f"{parameter} not in {valid_values}")
|
sdk/python/ragflow/modules/base.py
CHANGED
@@ -22,7 +22,7 @@ class Base(object):
|
|
22 |
res = self.rag.post(path, json, stream=stream,files=files)
|
23 |
return res
|
24 |
|
25 |
-
def get(self, path, params):
|
26 |
res = self.rag.get(path, params)
|
27 |
return res
|
28 |
|
|
|
22 |
res = self.rag.post(path, json, stream=stream,files=files)
|
23 |
return res
|
24 |
|
25 |
+
def get(self, path, params=None):
|
26 |
res = self.rag.get(path, params)
|
27 |
return res
|
28 |
|
sdk/python/ragflow/modules/chat.py
CHANGED
@@ -73,6 +73,3 @@ class Chat(Base):
|
|
73 |
res = res.json()
|
74 |
if res.get("code") != 0:
|
75 |
raise Exception(res.get("message"))
|
76 |
-
|
77 |
-
def get_prologue(self):
|
78 |
-
return self.prompt.opener
|
|
|
73 |
res = res.json()
|
74 |
if res.get("code") != 0:
|
75 |
raise Exception(res.get("message"))
|
|
|
|
|
|
sdk/python/ragflow/modules/dataset.py
CHANGED
@@ -1,78 +1,78 @@
|
|
1 |
-
from typing import Optional, List
|
2 |
-
|
3 |
-
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
|
4 |
-
|
5 |
-
from .document import Document
|
6 |
-
|
7 |
-
from .base import Base
|
8 |
-
|
9 |
-
|
10 |
-
class DataSet(Base):
|
11 |
-
class ParserConfig(Base):
|
12 |
-
def __init__(self, rag, res_dict):
|
13 |
-
self.chunk_token_count = 128
|
14 |
-
self.layout_recognize = True
|
15 |
-
self.delimiter = '\n!?。;!?'
|
16 |
-
self.task_page_size = 12
|
17 |
-
super().__init__(rag, res_dict)
|
18 |
-
|
19 |
-
def __init__(self, rag, res_dict):
|
20 |
-
self.id = ""
|
21 |
-
self.name = ""
|
22 |
-
self.avatar = ""
|
23 |
-
self.tenant_id = None
|
24 |
-
self.description = ""
|
25 |
-
self.language = "English"
|
26 |
-
self.embedding_model = ""
|
27 |
-
self.permission = "me"
|
28 |
-
self.document_count = 0
|
29 |
-
self.chunk_count = 0
|
30 |
-
self.
|
31 |
-
self.parser_config = None
|
32 |
-
for k in list(res_dict.keys()):
|
33 |
-
if k not in self.__dict__:
|
34 |
-
res_dict.pop(k)
|
35 |
-
super().__init__(rag, res_dict)
|
36 |
-
|
37 |
-
def update(self, update_message: dict):
|
38 |
-
res = self.put(f'/dataset/{self.id}',
|
39 |
-
update_message)
|
40 |
-
res = res.json()
|
41 |
-
if res.get("code") != 0:
|
42 |
-
raise Exception(res["message"])
|
43 |
-
|
44 |
-
def upload_documents(self,document_list: List[dict]):
|
45 |
-
url = f"/dataset/{self.id}/document"
|
46 |
-
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
|
47 |
-
res = self.post(path=url,json=None,files=files)
|
48 |
-
res = res.json()
|
49 |
-
if res.get("code") != 0:
|
50 |
-
raise Exception(res.get("message"))
|
51 |
-
|
52 |
-
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
|
53 |
-
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
|
54 |
-
res = res.json()
|
55 |
-
documents = []
|
56 |
-
if res.get("code") == 0:
|
57 |
-
for document in res["data"].get("docs"):
|
58 |
-
documents.append(Document(self.rag,document))
|
59 |
-
return documents
|
60 |
-
raise Exception(res["message"])
|
61 |
-
|
62 |
-
def delete_documents(self,ids: List[str] = None):
|
63 |
-
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
|
64 |
-
res = res.json()
|
65 |
-
if res.get("code") != 0:
|
66 |
-
raise Exception(res["message"])
|
67 |
-
|
68 |
-
def async_parse_documents(self,document_ids):
|
69 |
-
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
70 |
-
res = res.json()
|
71 |
-
if res.get("code") != 0:
|
72 |
-
raise Exception(res.get("message"))
|
73 |
-
|
74 |
-
def async_cancel_parse_documents(self,document_ids):
|
75 |
-
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
76 |
-
res = res.json()
|
77 |
-
if res.get("code") != 0:
|
78 |
-
raise Exception(res.get("message"))
|
|
|
1 |
+
from typing import Optional, List
|
2 |
+
|
3 |
+
from transformers.models.bloom.modeling_bloom import bloom_gelu_back
|
4 |
+
|
5 |
+
from .document import Document
|
6 |
+
|
7 |
+
from .base import Base
|
8 |
+
|
9 |
+
|
10 |
+
class DataSet(Base):
|
11 |
+
class ParserConfig(Base):
|
12 |
+
def __init__(self, rag, res_dict):
|
13 |
+
self.chunk_token_count = 128
|
14 |
+
self.layout_recognize = True
|
15 |
+
self.delimiter = '\n!?。;!?'
|
16 |
+
self.task_page_size = 12
|
17 |
+
super().__init__(rag, res_dict)
|
18 |
+
|
19 |
+
def __init__(self, rag, res_dict):
|
20 |
+
self.id = ""
|
21 |
+
self.name = ""
|
22 |
+
self.avatar = ""
|
23 |
+
self.tenant_id = None
|
24 |
+
self.description = ""
|
25 |
+
self.language = "English"
|
26 |
+
self.embedding_model = ""
|
27 |
+
self.permission = "me"
|
28 |
+
self.document_count = 0
|
29 |
+
self.chunk_count = 0
|
30 |
+
self.chunk_method = "naive"
|
31 |
+
self.parser_config = None
|
32 |
+
for k in list(res_dict.keys()):
|
33 |
+
if k not in self.__dict__:
|
34 |
+
res_dict.pop(k)
|
35 |
+
super().__init__(rag, res_dict)
|
36 |
+
|
37 |
+
def update(self, update_message: dict):
|
38 |
+
res = self.put(f'/dataset/{self.id}',
|
39 |
+
update_message)
|
40 |
+
res = res.json()
|
41 |
+
if res.get("code") != 0:
|
42 |
+
raise Exception(res["message"])
|
43 |
+
|
44 |
+
def upload_documents(self,document_list: List[dict]):
|
45 |
+
url = f"/dataset/{self.id}/document"
|
46 |
+
files = [("file",(ele["name"],ele["blob"])) for ele in document_list]
|
47 |
+
res = self.post(path=url,json=None,files=files)
|
48 |
+
res = res.json()
|
49 |
+
if res.get("code") != 0:
|
50 |
+
raise Exception(res.get("message"))
|
51 |
+
|
52 |
+
def list_documents(self, id: str = None, keywords: str = None, offset: int =1, limit: int = 1024, orderby: str = "create_time", desc: bool = True):
|
53 |
+
res = self.get(f"/dataset/{self.id}/info",params={"id": id,"keywords": keywords,"offset": offset,"limit": limit,"orderby": orderby,"desc": desc})
|
54 |
+
res = res.json()
|
55 |
+
documents = []
|
56 |
+
if res.get("code") == 0:
|
57 |
+
for document in res["data"].get("docs"):
|
58 |
+
documents.append(Document(self.rag,document))
|
59 |
+
return documents
|
60 |
+
raise Exception(res["message"])
|
61 |
+
|
62 |
+
def delete_documents(self,ids: List[str] = None):
|
63 |
+
res = self.rm(f"/dataset/{self.id}/document",{"ids":ids})
|
64 |
+
res = res.json()
|
65 |
+
if res.get("code") != 0:
|
66 |
+
raise Exception(res["message"])
|
67 |
+
|
68 |
+
def async_parse_documents(self,document_ids):
|
69 |
+
res = self.post(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
70 |
+
res = res.json()
|
71 |
+
if res.get("code") != 0:
|
72 |
+
raise Exception(res.get("message"))
|
73 |
+
|
74 |
+
def async_cancel_parse_documents(self,document_ids):
|
75 |
+
res = self.rm(f"/dataset/{self.id}/chunk",{"document_ids":document_ids})
|
76 |
+
res = res.json()
|
77 |
+
if res.get("code") != 0:
|
78 |
+
raise Exception(res.get("message"))
|
sdk/python/ragflow/modules/document.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
from PIL.ImageFile import raise_oserror
|
4 |
-
|
5 |
from .base import Base
|
6 |
from .chunk import Chunk
|
7 |
from typing import List
|
@@ -13,7 +10,7 @@ class Document(Base):
|
|
13 |
self.name = ""
|
14 |
self.thumbnail = None
|
15 |
self.knowledgebase_id = None
|
16 |
-
self.
|
17 |
self.parser_config = {"pages": [[1, 1000000]]}
|
18 |
self.source_type = "local"
|
19 |
self.type = ""
|
@@ -32,6 +29,23 @@ class Document(Base):
|
|
32 |
res_dict.pop(k)
|
33 |
super().__init__(rag, res_dict)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
|
36 |
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
|
37 |
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
|
|
|
1 |
+
import json
|
|
|
|
|
|
|
2 |
from .base import Base
|
3 |
from .chunk import Chunk
|
4 |
from typing import List
|
|
|
10 |
self.name = ""
|
11 |
self.thumbnail = None
|
12 |
self.knowledgebase_id = None
|
13 |
+
self.chunk_method = ""
|
14 |
self.parser_config = {"pages": [[1, 1000000]]}
|
15 |
self.source_type = "local"
|
16 |
self.type = ""
|
|
|
29 |
res_dict.pop(k)
|
30 |
super().__init__(rag, res_dict)
|
31 |
|
32 |
+
|
33 |
+
def update(self, update_message: dict):
|
34 |
+
res = self.put(f'/dataset/{self.knowledgebase_id}/info/{self.id}',
|
35 |
+
update_message)
|
36 |
+
res = res.json()
|
37 |
+
if res.get("code") != 0:
|
38 |
+
raise Exception(res["message"])
|
39 |
+
|
40 |
+
def download(self):
|
41 |
+
res = self.get(f"/dataset/{self.knowledgebase_id}/document/{self.id}")
|
42 |
+
try:
|
43 |
+
res = res.json()
|
44 |
+
raise Exception(res.get("message"))
|
45 |
+
except json.JSONDecodeError:
|
46 |
+
return res.content
|
47 |
+
|
48 |
+
|
49 |
def list_chunks(self,offset=0, limit=30, keywords="", id:str=None):
|
50 |
data={"document_id": self.id,"keywords": keywords,"offset":offset,"limit":limit,"id":id}
|
51 |
res = self.get(f'/dataset/{self.knowledgebase_id}/document/{self.id}/chunk', data)
|
sdk/python/ragflow/ragflow.py
CHANGED
@@ -24,11 +24,11 @@ from .modules.document import Document
|
|
24 |
|
25 |
|
26 |
class RAGFlow:
|
27 |
-
def __init__(self,
|
28 |
"""
|
29 |
api_url: http://<host_address>/api/v1
|
30 |
"""
|
31 |
-
self.user_key =
|
32 |
self.api_url = f"{base_url}/api/{version}"
|
33 |
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
|
34 |
|
@@ -50,7 +50,7 @@ class RAGFlow:
|
|
50 |
|
51 |
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
|
52 |
permission: str = "me",
|
53 |
-
document_count: int = 0, chunk_count: int = 0,
|
54 |
parser_config: DataSet.ParserConfig = None) -> DataSet:
|
55 |
if parser_config is None:
|
56 |
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
|
@@ -59,7 +59,7 @@ class RAGFlow:
|
|
59 |
res = self.post("/dataset",
|
60 |
{"name": name, "avatar": avatar, "description": description, "language": language,
|
61 |
"permission": permission,
|
62 |
-
"document_count": document_count, "chunk_count": chunk_count, "
|
63 |
"parser_config": parser_config
|
64 |
}
|
65 |
)
|
@@ -93,7 +93,7 @@ class RAGFlow:
|
|
93 |
return result_list
|
94 |
raise Exception(res["message"])
|
95 |
|
96 |
-
def create_chat(self, name: str
|
97 |
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
|
98 |
datasets = []
|
99 |
for dataset in knowledgebases:
|
|
|
24 |
|
25 |
|
26 |
class RAGFlow:
|
27 |
+
def __init__(self, api_key, base_url, version='v1'):
|
28 |
"""
|
29 |
api_url: http://<host_address>/api/v1
|
30 |
"""
|
31 |
+
self.user_key = api_key
|
32 |
self.api_url = f"{base_url}/api/{version}"
|
33 |
self.authorization_header = {"Authorization": "{} {}".format("Bearer", self.user_key)}
|
34 |
|
|
|
50 |
|
51 |
def create_dataset(self, name: str, avatar: str = "", description: str = "", language: str = "English",
|
52 |
permission: str = "me",
|
53 |
+
document_count: int = 0, chunk_count: int = 0, chunk_method: str = "naive",
|
54 |
parser_config: DataSet.ParserConfig = None) -> DataSet:
|
55 |
if parser_config is None:
|
56 |
parser_config = DataSet.ParserConfig(self, {"chunk_token_count": 128, "layout_recognize": True,
|
|
|
59 |
res = self.post("/dataset",
|
60 |
{"name": name, "avatar": avatar, "description": description, "language": language,
|
61 |
"permission": permission,
|
62 |
+
"document_count": document_count, "chunk_count": chunk_count, "chunk_method": chunk_method,
|
63 |
"parser_config": parser_config
|
64 |
}
|
65 |
)
|
|
|
93 |
return result_list
|
94 |
raise Exception(res["message"])
|
95 |
|
96 |
+
def create_chat(self, name: str, avatar: str = "", knowledgebases: List[DataSet] = [],
|
97 |
llm: Chat.LLM = None, prompt: Chat.Prompt = None) -> Chat:
|
98 |
datasets = []
|
99 |
for dataset in knowledgebases:
|
sdk/python/test/t_document.py
CHANGED
@@ -35,7 +35,7 @@ class TestDocument(TestSdk):
|
|
35 |
def test_update_document_with_success(self):
|
36 |
"""
|
37 |
Test updating a document with success.
|
38 |
-
Update name or
|
39 |
"""
|
40 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
41 |
ds = rag.list_datasets(name="God")
|
@@ -43,7 +43,7 @@ class TestDocument(TestSdk):
|
|
43 |
doc = ds.list_documents()
|
44 |
doc = doc[0]
|
45 |
if isinstance(doc, Document):
|
46 |
-
res = doc.update({"
|
47 |
assert res is None, f"Failed to update document, error: {res}"
|
48 |
else:
|
49 |
assert False, f"Failed to get document, error: {doc}"
|
|
|
35 |
def test_update_document_with_success(self):
|
36 |
"""
|
37 |
Test updating a document with success.
|
38 |
+
Update name or chunk_method are supported
|
39 |
"""
|
40 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
41 |
ds = rag.list_datasets(name="God")
|
|
|
43 |
doc = ds.list_documents()
|
44 |
doc = doc[0]
|
45 |
if isinstance(doc, Document):
|
46 |
+
res = doc.update({"chunk_method":"manual","name":"manual.txt"})
|
47 |
assert res is None, f"Failed to update document, error: {res}"
|
48 |
else:
|
49 |
assert False, f"Failed to get document, error: {doc}"
|