Add test for API (#3134)
Browse files### What problem does this PR solve?
Add test for API
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
---------
Co-authored-by: liuhua <[email protected]>
Co-authored-by: Zhichang Yu <[email protected]>
- .github/workflows/tests.yml +1 -1
- api/apps/sdk/doc.py +11 -7
- api/db/services/document_service.py +42 -37
- sdk/python/ragflow_sdk/ragflow.py +1 -1
- sdk/python/test/common.py +2 -0
- sdk/python/test/t_chat.py +4 -6
- sdk/python/test/t_chunk.py +191 -0
- sdk/python/test/t_dataset.py +6 -7
- sdk/python/test/t_document.py +33 -293
- sdk/python/test/t_session.py +1 -3
.github/workflows/tests.yml
CHANGED
@@ -78,7 +78,7 @@ jobs:
|
|
78 |
echo "Waiting for service to be available..."
|
79 |
sleep 5
|
80 |
done
|
81 |
-
cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest t_dataset.py t_chat.py t_session.py
|
82 |
|
83 |
- name: Stop ragflow:dev
|
84 |
if: always() # always run this step even if previous steps failed
|
|
|
78 |
echo "Waiting for service to be available..."
|
79 |
sleep 5
|
80 |
done
|
81 |
+
cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest t_dataset.py t_chat.py t_session.py t_document.py t_chunk.py
|
82 |
|
83 |
- name: Stop ragflow:dev
|
84 |
if: always() # always run this step even if previous steps failed
|
api/apps/sdk/doc.py
CHANGED
@@ -194,8 +194,11 @@ def list_docs(dataset_id, tenant_id):
|
|
194 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
195 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
196 |
id = request.args.get("id")
|
|
|
197 |
if not DocumentService.query(id=id,kb_id=dataset_id):
|
198 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
|
|
|
|
199 |
offset = int(request.args.get("offset", 1))
|
200 |
keywords = request.args.get("keywords","")
|
201 |
limit = int(request.args.get("limit", 1024))
|
@@ -204,7 +207,7 @@ def list_docs(dataset_id, tenant_id):
|
|
204 |
desc = False
|
205 |
else:
|
206 |
desc = True
|
207 |
-
docs, tol = DocumentService.get_list(dataset_id, offset, limit, orderby, desc, keywords, id)
|
208 |
|
209 |
# rename key's name
|
210 |
renamed_doc_list = []
|
@@ -321,8 +324,8 @@ def stop_parsing(tenant_id,dataset_id):
|
|
321 |
doc = DocumentService.query(id=id, kb_id=dataset_id)
|
322 |
if not doc:
|
323 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
324 |
-
if doc[0].progress ==
|
325 |
-
return get_error_data_result("Can't stop parsing document with progress at 0 or
|
326 |
info = {"run": "2", "progress": 0,"chunk_num":0}
|
327 |
DocumentService.update_by_id(id, info)
|
328 |
ELASTICSEARCH.deleteByQuery(
|
@@ -414,9 +417,9 @@ def list_chunks(tenant_id,dataset_id,document_id):
|
|
414 |
for key, value in chunk.items():
|
415 |
new_key = key_mapping.get(key, key)
|
416 |
renamed_chunk[new_key] = value
|
417 |
-
if renamed_chunk["available"] ==
|
418 |
renamed_chunk["available"] = False
|
419 |
-
if renamed_chunk["available"] ==
|
420 |
renamed_chunk["available"] = True
|
421 |
res["chunks"].append(renamed_chunk)
|
422 |
return get_result(data=res)
|
@@ -464,6 +467,7 @@ def add_chunk(tenant_id,dataset_id,document_id):
|
|
464 |
DocumentService.increment_chunk_num(
|
465 |
doc.id, doc.kb_id, c, 1, 0)
|
466 |
d["chunk_id"] = chunk_id
|
|
|
467 |
# rename keys
|
468 |
key_mapping = {
|
469 |
"chunk_id": "id",
|
@@ -581,10 +585,10 @@ def update_chunk(tenant_id,dataset_id,document_id,chunk_id):
|
|
581 |
def retrieval_test(tenant_id):
|
582 |
req = request.json
|
583 |
if not req.get("dataset_ids"):
|
584 |
-
return get_error_data_result("`
|
585 |
kb_ids = req["dataset_ids"]
|
586 |
if not isinstance(kb_ids,list):
|
587 |
-
return get_error_data_result("`
|
588 |
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
589 |
for id in kb_ids:
|
590 |
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
|
|
194 |
if not KnowledgebaseService.query(id=dataset_id, tenant_id=tenant_id):
|
195 |
return get_error_data_result(retmsg=f"You don't own the dataset {dataset_id}. ")
|
196 |
id = request.args.get("id")
|
197 |
+
name = request.args.get("name")
|
198 |
if not DocumentService.query(id=id,kb_id=dataset_id):
|
199 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
200 |
+
if not DocumentService.query(name=name,kb_id=dataset_id):
|
201 |
+
return get_error_data_result(retmsg=f"You don't own the document {name}.")
|
202 |
offset = int(request.args.get("offset", 1))
|
203 |
keywords = request.args.get("keywords","")
|
204 |
limit = int(request.args.get("limit", 1024))
|
|
|
207 |
desc = False
|
208 |
else:
|
209 |
desc = True
|
210 |
+
docs, tol = DocumentService.get_list(dataset_id, offset, limit, orderby, desc, keywords, id,name)
|
211 |
|
212 |
# rename key's name
|
213 |
renamed_doc_list = []
|
|
|
324 |
doc = DocumentService.query(id=id, kb_id=dataset_id)
|
325 |
if not doc:
|
326 |
return get_error_data_result(retmsg=f"You don't own the document {id}.")
|
327 |
+
if int(doc[0].progress) == 1 or int(doc[0].progress) == 0:
|
328 |
+
return get_error_data_result("Can't stop parsing document with progress at 0 or 1")
|
329 |
info = {"run": "2", "progress": 0,"chunk_num":0}
|
330 |
DocumentService.update_by_id(id, info)
|
331 |
ELASTICSEARCH.deleteByQuery(
|
|
|
417 |
for key, value in chunk.items():
|
418 |
new_key = key_mapping.get(key, key)
|
419 |
renamed_chunk[new_key] = value
|
420 |
+
if renamed_chunk["available"] == 0:
|
421 |
renamed_chunk["available"] = False
|
422 |
+
if renamed_chunk["available"] == 1:
|
423 |
renamed_chunk["available"] = True
|
424 |
res["chunks"].append(renamed_chunk)
|
425 |
return get_result(data=res)
|
|
|
467 |
DocumentService.increment_chunk_num(
|
468 |
doc.id, doc.kb_id, c, 1, 0)
|
469 |
d["chunk_id"] = chunk_id
|
470 |
+
d["kb_id"]=doc.kb_id
|
471 |
# rename keys
|
472 |
key_mapping = {
|
473 |
"chunk_id": "id",
|
|
|
585 |
def retrieval_test(tenant_id):
|
586 |
req = request.json
|
587 |
if not req.get("dataset_ids"):
|
588 |
+
return get_error_data_result("`dataset_ids` is required.")
|
589 |
kb_ids = req["dataset_ids"]
|
590 |
if not isinstance(kb_ids,list):
|
591 |
+
return get_error_data_result("`dataset_ids` should be a list")
|
592 |
kbs = KnowledgebaseService.get_by_ids(kb_ids)
|
593 |
for id in kb_ids:
|
594 |
if not KnowledgebaseService.query(id=id,tenant_id=tenant_id):
|
api/db/services/document_service.py
CHANGED
@@ -52,11 +52,15 @@ class DocumentService(CommonService):
|
|
52 |
@classmethod
|
53 |
@DB.connection_context()
|
54 |
def get_list(cls, kb_id, page_number, items_per_page,
|
55 |
-
|
56 |
-
docs =cls.model.select().where(cls.model.kb_id==kb_id)
|
57 |
if id:
|
58 |
docs = docs.where(
|
59 |
-
cls.model.id== id
|
|
|
|
|
|
|
|
|
60 |
if keywords:
|
61 |
docs = docs.where(
|
62 |
fn.LOWER(cls.model.name).contains(keywords.lower())
|
@@ -70,7 +74,6 @@ class DocumentService(CommonService):
|
|
70 |
count = docs.count()
|
71 |
return list(docs.dicts()), count
|
72 |
|
73 |
-
|
74 |
@classmethod
|
75 |
@DB.connection_context()
|
76 |
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
@@ -162,26 +165,27 @@ class DocumentService(CommonService):
|
|
162 |
cls.model.update_time]
|
163 |
docs = cls.model.select(*fields) \
|
164 |
.join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \
|
165 |
-
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id))\
|
166 |
.where(
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
.order_by(cls.model.update_time.asc())
|
173 |
return list(docs.dicts())
|
174 |
|
175 |
@classmethod
|
176 |
@DB.connection_context()
|
177 |
def get_unfinished_docs(cls):
|
178 |
-
fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg,
|
|
|
179 |
docs = cls.model.select(*fields) \
|
180 |
.where(
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
return list(docs.dicts())
|
186 |
|
187 |
@classmethod
|
@@ -196,12 +200,12 @@ class DocumentService(CommonService):
|
|
196 |
"Document not found which is supposed to be there")
|
197 |
num = Knowledgebase.update(
|
198 |
token_num=Knowledgebase.token_num +
|
199 |
-
|
200 |
chunk_num=Knowledgebase.chunk_num +
|
201 |
-
|
202 |
Knowledgebase.id == kb_id).execute()
|
203 |
return num
|
204 |
-
|
205 |
@classmethod
|
206 |
@DB.connection_context()
|
207 |
def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
|
@@ -214,13 +218,13 @@ class DocumentService(CommonService):
|
|
214 |
"Document not found which is supposed to be there")
|
215 |
num = Knowledgebase.update(
|
216 |
token_num=Knowledgebase.token_num -
|
217 |
-
|
218 |
chunk_num=Knowledgebase.chunk_num -
|
219 |
-
|
220 |
).where(
|
221 |
Knowledgebase.id == kb_id).execute()
|
222 |
return num
|
223 |
-
|
224 |
@classmethod
|
225 |
@DB.connection_context()
|
226 |
def clear_chunk_num(cls, doc_id):
|
@@ -229,10 +233,10 @@ class DocumentService(CommonService):
|
|
229 |
|
230 |
num = Knowledgebase.update(
|
231 |
token_num=Knowledgebase.token_num -
|
232 |
-
|
233 |
chunk_num=Knowledgebase.chunk_num -
|
234 |
-
|
235 |
-
doc_num=Knowledgebase.doc_num-1
|
236 |
).where(
|
237 |
Knowledgebase.id == doc.kb_id).execute()
|
238 |
return num
|
@@ -243,8 +247,8 @@ class DocumentService(CommonService):
|
|
243 |
docs = cls.model.select(
|
244 |
Knowledgebase.tenant_id).join(
|
245 |
Knowledgebase, on=(
|
246 |
-
|
247 |
-
|
248 |
docs = docs.dicts()
|
249 |
if not docs:
|
250 |
return
|
@@ -270,8 +274,8 @@ class DocumentService(CommonService):
|
|
270 |
cls.model.id).join(
|
271 |
Knowledgebase, on=(
|
272 |
Knowledgebase.id == cls.model.kb_id)
|
273 |
-
|
274 |
-
|
275 |
docs = docs.dicts()
|
276 |
if not docs:
|
277 |
return False
|
@@ -284,7 +288,7 @@ class DocumentService(CommonService):
|
|
284 |
cls.model.id).join(
|
285 |
Knowledgebase, on=(
|
286 |
Knowledgebase.id == cls.model.kb_id)
|
287 |
-
|
288 |
docs = docs.dicts()
|
289 |
if not docs:
|
290 |
return False
|
@@ -296,13 +300,13 @@ class DocumentService(CommonService):
|
|
296 |
docs = cls.model.select(
|
297 |
Knowledgebase.embd_id).join(
|
298 |
Knowledgebase, on=(
|
299 |
-
|
300 |
-
|
301 |
docs = docs.dicts()
|
302 |
if not docs:
|
303 |
return
|
304 |
return docs[0]["embd_id"]
|
305 |
-
|
306 |
@classmethod
|
307 |
@DB.connection_context()
|
308 |
def get_doc_id_by_doc_name(cls, doc_name):
|
@@ -338,6 +342,7 @@ class DocumentService(CommonService):
|
|
338 |
dfs_update(old[k], v)
|
339 |
else:
|
340 |
old[k] = v
|
|
|
341 |
dfs_update(d.parser_config, config)
|
342 |
cls.update_by_id(id, {"parser_config": d.parser_config})
|
343 |
|
@@ -372,7 +377,7 @@ class DocumentService(CommonService):
|
|
372 |
finished = True
|
373 |
bad = 0
|
374 |
e, doc = DocumentService.get_by_id(d["id"])
|
375 |
-
status = doc.run#TaskStatus.RUNNING.value
|
376 |
for t in tsks:
|
377 |
if 0 <= t.progress < 1:
|
378 |
finished = False
|
@@ -386,9 +391,10 @@ class DocumentService(CommonService):
|
|
386 |
prg = -1
|
387 |
status = TaskStatus.FAIL.value
|
388 |
elif finished:
|
389 |
-
if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(
|
|
|
390 |
queue_raptor_tasks(d)
|
391 |
-
prg = 0.98 * len(tsks)/(len(tsks)+1)
|
392 |
msg.append("------ RAPTOR -------")
|
393 |
else:
|
394 |
status = TaskStatus.DONE.value
|
@@ -414,7 +420,6 @@ class DocumentService(CommonService):
|
|
414 |
return len(cls.model.select(cls.model.id).where(
|
415 |
cls.model.kb_id == kb_id).dicts())
|
416 |
|
417 |
-
|
418 |
@classmethod
|
419 |
@DB.connection_context()
|
420 |
def do_cancel(cls, doc_id):
|
@@ -579,4 +584,4 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
|
|
579 |
DocumentService.increment_chunk_num(
|
580 |
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
|
581 |
|
582 |
-
return [d["id"] for d,_ in files]
|
|
|
52 |
@classmethod
|
53 |
@DB.connection_context()
|
54 |
def get_list(cls, kb_id, page_number, items_per_page,
|
55 |
+
orderby, desc, keywords, id, name):
|
56 |
+
docs = cls.model.select().where(cls.model.kb_id == kb_id)
|
57 |
if id:
|
58 |
docs = docs.where(
|
59 |
+
cls.model.id == id)
|
60 |
+
if name:
|
61 |
+
docs = docs.where(
|
62 |
+
cls.model.name == name
|
63 |
+
)
|
64 |
if keywords:
|
65 |
docs = docs.where(
|
66 |
fn.LOWER(cls.model.name).contains(keywords.lower())
|
|
|
74 |
count = docs.count()
|
75 |
return list(docs.dicts()), count
|
76 |
|
|
|
77 |
@classmethod
|
78 |
@DB.connection_context()
|
79 |
def get_by_kb_id(cls, kb_id, page_number, items_per_page,
|
|
|
165 |
cls.model.update_time]
|
166 |
docs = cls.model.select(*fields) \
|
167 |
.join(Knowledgebase, on=(cls.model.kb_id == Knowledgebase.id)) \
|
168 |
+
.join(Tenant, on=(Knowledgebase.tenant_id == Tenant.id)) \
|
169 |
.where(
|
170 |
+
cls.model.status == StatusEnum.VALID.value,
|
171 |
+
~(cls.model.type == FileType.VIRTUAL.value),
|
172 |
+
cls.model.progress == 0,
|
173 |
+
cls.model.update_time >= current_timestamp() - 1000 * 600,
|
174 |
+
cls.model.run == TaskStatus.RUNNING.value) \
|
175 |
.order_by(cls.model.update_time.asc())
|
176 |
return list(docs.dicts())
|
177 |
|
178 |
@classmethod
|
179 |
@DB.connection_context()
|
180 |
def get_unfinished_docs(cls):
|
181 |
+
fields = [cls.model.id, cls.model.process_begin_at, cls.model.parser_config, cls.model.progress_msg,
|
182 |
+
cls.model.run]
|
183 |
docs = cls.model.select(*fields) \
|
184 |
.where(
|
185 |
+
cls.model.status == StatusEnum.VALID.value,
|
186 |
+
~(cls.model.type == FileType.VIRTUAL.value),
|
187 |
+
cls.model.progress < 1,
|
188 |
+
cls.model.progress > 0)
|
189 |
return list(docs.dicts())
|
190 |
|
191 |
@classmethod
|
|
|
200 |
"Document not found which is supposed to be there")
|
201 |
num = Knowledgebase.update(
|
202 |
token_num=Knowledgebase.token_num +
|
203 |
+
token_num,
|
204 |
chunk_num=Knowledgebase.chunk_num +
|
205 |
+
chunk_num).where(
|
206 |
Knowledgebase.id == kb_id).execute()
|
207 |
return num
|
208 |
+
|
209 |
@classmethod
|
210 |
@DB.connection_context()
|
211 |
def decrement_chunk_num(cls, doc_id, kb_id, token_num, chunk_num, duation):
|
|
|
218 |
"Document not found which is supposed to be there")
|
219 |
num = Knowledgebase.update(
|
220 |
token_num=Knowledgebase.token_num -
|
221 |
+
token_num,
|
222 |
chunk_num=Knowledgebase.chunk_num -
|
223 |
+
chunk_num
|
224 |
).where(
|
225 |
Knowledgebase.id == kb_id).execute()
|
226 |
return num
|
227 |
+
|
228 |
@classmethod
|
229 |
@DB.connection_context()
|
230 |
def clear_chunk_num(cls, doc_id):
|
|
|
233 |
|
234 |
num = Knowledgebase.update(
|
235 |
token_num=Knowledgebase.token_num -
|
236 |
+
doc.token_num,
|
237 |
chunk_num=Knowledgebase.chunk_num -
|
238 |
+
doc.chunk_num,
|
239 |
+
doc_num=Knowledgebase.doc_num - 1
|
240 |
).where(
|
241 |
Knowledgebase.id == doc.kb_id).execute()
|
242 |
return num
|
|
|
247 |
docs = cls.model.select(
|
248 |
Knowledgebase.tenant_id).join(
|
249 |
Knowledgebase, on=(
|
250 |
+
Knowledgebase.id == cls.model.kb_id)).where(
|
251 |
+
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
252 |
docs = docs.dicts()
|
253 |
if not docs:
|
254 |
return
|
|
|
274 |
cls.model.id).join(
|
275 |
Knowledgebase, on=(
|
276 |
Knowledgebase.id == cls.model.kb_id)
|
277 |
+
).join(UserTenant, on=(UserTenant.tenant_id == Knowledgebase.tenant_id)
|
278 |
+
).where(cls.model.id == doc_id, UserTenant.user_id == user_id).paginate(0, 1)
|
279 |
docs = docs.dicts()
|
280 |
if not docs:
|
281 |
return False
|
|
|
288 |
cls.model.id).join(
|
289 |
Knowledgebase, on=(
|
290 |
Knowledgebase.id == cls.model.kb_id)
|
291 |
+
).where(cls.model.id == doc_id, Knowledgebase.created_by == user_id).paginate(0, 1)
|
292 |
docs = docs.dicts()
|
293 |
if not docs:
|
294 |
return False
|
|
|
300 |
docs = cls.model.select(
|
301 |
Knowledgebase.embd_id).join(
|
302 |
Knowledgebase, on=(
|
303 |
+
Knowledgebase.id == cls.model.kb_id)).where(
|
304 |
+
cls.model.id == doc_id, Knowledgebase.status == StatusEnum.VALID.value)
|
305 |
docs = docs.dicts()
|
306 |
if not docs:
|
307 |
return
|
308 |
return docs[0]["embd_id"]
|
309 |
+
|
310 |
@classmethod
|
311 |
@DB.connection_context()
|
312 |
def get_doc_id_by_doc_name(cls, doc_name):
|
|
|
342 |
dfs_update(old[k], v)
|
343 |
else:
|
344 |
old[k] = v
|
345 |
+
|
346 |
dfs_update(d.parser_config, config)
|
347 |
cls.update_by_id(id, {"parser_config": d.parser_config})
|
348 |
|
|
|
377 |
finished = True
|
378 |
bad = 0
|
379 |
e, doc = DocumentService.get_by_id(d["id"])
|
380 |
+
status = doc.run # TaskStatus.RUNNING.value
|
381 |
for t in tsks:
|
382 |
if 0 <= t.progress < 1:
|
383 |
finished = False
|
|
|
391 |
prg = -1
|
392 |
status = TaskStatus.FAIL.value
|
393 |
elif finished:
|
394 |
+
if d["parser_config"].get("raptor", {}).get("use_raptor") and d["progress_msg"].lower().find(
|
395 |
+
" raptor") < 0:
|
396 |
queue_raptor_tasks(d)
|
397 |
+
prg = 0.98 * len(tsks) / (len(tsks) + 1)
|
398 |
msg.append("------ RAPTOR -------")
|
399 |
else:
|
400 |
status = TaskStatus.DONE.value
|
|
|
420 |
return len(cls.model.select(cls.model.id).where(
|
421 |
cls.model.kb_id == kb_id).dicts())
|
422 |
|
|
|
423 |
@classmethod
|
424 |
@DB.connection_context()
|
425 |
def do_cancel(cls, doc_id):
|
|
|
584 |
DocumentService.increment_chunk_num(
|
585 |
doc_id, kb.id, token_counts[doc_id], chunk_counts[doc_id], 0)
|
586 |
|
587 |
+
return [d["id"] for d, _ in files]
|
sdk/python/ragflow_sdk/ragflow.py
CHANGED
@@ -166,7 +166,7 @@ class RAGFlow:
|
|
166 |
"rerank_id": rerank_id,
|
167 |
"keyword": keyword,
|
168 |
"question": question,
|
169 |
-
"
|
170 |
"documents": document_ids
|
171 |
}
|
172 |
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
|
|
|
166 |
"rerank_id": rerank_id,
|
167 |
"keyword": keyword,
|
168 |
"question": question,
|
169 |
+
"dataset_ids": dataset_ids,
|
170 |
"documents": document_ids
|
171 |
}
|
172 |
# Send a POST request to the backend service (using requests library as an example, actual implementation may vary)
|
sdk/python/test/common.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
HOST_ADDRESS=os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
sdk/python/test/t_chat.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import os
|
2 |
from ragflow_sdk import RAGFlow
|
3 |
-
|
4 |
-
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
5 |
|
6 |
def test_create_chat_with_name(get_api_key_fixture):
|
7 |
API_KEY = get_api_key_fixture
|
@@ -16,7 +14,7 @@ def test_create_chat_with_name(get_api_key_fixture):
|
|
16 |
docs= kb.upload_documents(documents)
|
17 |
for doc in docs:
|
18 |
doc.add_chunk("This is a test to add chunk")
|
19 |
-
rag.create_chat("
|
20 |
|
21 |
|
22 |
def test_update_chat_with_name(get_api_key_fixture):
|
@@ -32,7 +30,7 @@ def test_update_chat_with_name(get_api_key_fixture):
|
|
32 |
docs = kb.upload_documents(documents)
|
33 |
for doc in docs:
|
34 |
doc.add_chunk("This is a test to add chunk")
|
35 |
-
chat = rag.create_chat("
|
36 |
chat.update({"name": "new_chat"})
|
37 |
|
38 |
|
@@ -49,7 +47,7 @@ def test_delete_chats_with_success(get_api_key_fixture):
|
|
49 |
docs = kb.upload_documents(documents)
|
50 |
for doc in docs:
|
51 |
doc.add_chunk("This is a test to add chunk")
|
52 |
-
chat = rag.create_chat("
|
53 |
rag.delete_chats(ids=[chat.id])
|
54 |
|
55 |
def test_list_chats_with_success(get_api_key_fixture):
|
|
|
|
|
1 |
from ragflow_sdk import RAGFlow
|
2 |
+
from common import HOST_ADDRESS
|
|
|
3 |
|
4 |
def test_create_chat_with_name(get_api_key_fixture):
|
5 |
API_KEY = get_api_key_fixture
|
|
|
14 |
docs= kb.upload_documents(documents)
|
15 |
for doc in docs:
|
16 |
doc.add_chunk("This is a test to add chunk")
|
17 |
+
rag.create_chat("test_create_chat", dataset_ids=[kb.id])
|
18 |
|
19 |
|
20 |
def test_update_chat_with_name(get_api_key_fixture):
|
|
|
30 |
docs = kb.upload_documents(documents)
|
31 |
for doc in docs:
|
32 |
doc.add_chunk("This is a test to add chunk")
|
33 |
+
chat = rag.create_chat("test_update_chat", dataset_ids=[kb.id])
|
34 |
chat.update({"name": "new_chat"})
|
35 |
|
36 |
|
|
|
47 |
docs = kb.upload_documents(documents)
|
48 |
for doc in docs:
|
49 |
doc.add_chunk("This is a test to add chunk")
|
50 |
+
chat = rag.create_chat("test_delete_chat", dataset_ids=[kb.id])
|
51 |
rag.delete_chats(ids=[chat.id])
|
52 |
|
53 |
def test_list_chats_with_success(get_api_key_fixture):
|
sdk/python/test/t_chunk.py
ADDED
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ragflow_sdk import RAGFlow
|
2 |
+
from common import HOST_ADDRESS
|
3 |
+
from time import sleep
|
4 |
+
import pytest
|
5 |
+
|
6 |
+
def test_parse_document_with_txt(get_api_key_fixture):
|
7 |
+
API_KEY = get_api_key_fixture
|
8 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
9 |
+
ds = rag.create_dataset(name="test_parse_document")
|
10 |
+
name = 'ragflow_test.txt'
|
11 |
+
with open("test_data/ragflow_test.txt","rb") as file :
|
12 |
+
blob = file.read()
|
13 |
+
docs = ds.upload_documents([{"displayed_name": name, "blob": blob}])
|
14 |
+
doc = docs[0]
|
15 |
+
ds.async_parse_documents(document_ids=[doc.id])
|
16 |
+
'''
|
17 |
+
for n in range(100):
|
18 |
+
if doc.progress == 1:
|
19 |
+
break
|
20 |
+
sleep(1)
|
21 |
+
else:
|
22 |
+
raise Exception("Run time ERROR: Document parsing did not complete in time.")
|
23 |
+
'''
|
24 |
+
|
25 |
+
def test_parse_and_cancel_document(get_api_key_fixture):
|
26 |
+
API_KEY = get_api_key_fixture
|
27 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
28 |
+
ds = rag.create_dataset(name="test_parse_and_cancel_document")
|
29 |
+
name = 'ragflow_test.txt'
|
30 |
+
with open("test_data/ragflow_test.txt","rb") as file :
|
31 |
+
blob = file.read()
|
32 |
+
docs=ds.upload_documents([{"displayed_name": name, "blob": blob}])
|
33 |
+
doc = docs[0]
|
34 |
+
ds.async_parse_documents(document_ids=[doc.id])
|
35 |
+
sleep(1)
|
36 |
+
if 0 < doc.progress < 1:
|
37 |
+
ds.async_cancel_parse_documents(document_ids=[doc.id])
|
38 |
+
|
39 |
+
|
40 |
+
def test_bulk_parse_documents(get_api_key_fixture):
|
41 |
+
API_KEY = get_api_key_fixture
|
42 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
43 |
+
ds = rag.create_dataset(name="test_bulk_parse_and_cancel_documents")
|
44 |
+
with open("ragflow.txt","rb") as file:
|
45 |
+
blob = file.read()
|
46 |
+
documents = [
|
47 |
+
{'displayed_name': 'test1.txt', 'blob': blob},
|
48 |
+
{'displayed_name': 'test2.txt', 'blob': blob},
|
49 |
+
{'displayed_name': 'test3.txt', 'blob': blob}
|
50 |
+
]
|
51 |
+
docs = ds.upload_documents(documents)
|
52 |
+
ids = [doc.id for doc in docs]
|
53 |
+
ds.async_parse_documents(ids)
|
54 |
+
'''
|
55 |
+
for n in range(100):
|
56 |
+
all_completed = all(doc.progress == 1 for doc in docs)
|
57 |
+
if all_completed:
|
58 |
+
break
|
59 |
+
sleep(1)
|
60 |
+
else:
|
61 |
+
raise Exception("Run time ERROR: Bulk document parsing did not complete in time.")
|
62 |
+
'''
|
63 |
+
|
64 |
+
@pytest.mark.skip(reason="DocumentService.get_list() expects page and page_size")
|
65 |
+
def test_list_chunks_with_success(get_api_key_fixture):
|
66 |
+
API_KEY = get_api_key_fixture
|
67 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
68 |
+
ds = rag.create_dataset(name="test_list_chunks_with_success")
|
69 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
70 |
+
blob = file.read()
|
71 |
+
'''
|
72 |
+
# chunk_size = 1024 * 1024
|
73 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
74 |
+
documents = [
|
75 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
76 |
+
]
|
77 |
+
'''
|
78 |
+
documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
|
79 |
+
docs = ds.upload_documents(documents)
|
80 |
+
ids = [doc.id for doc in docs]
|
81 |
+
ds.async_parse_documents(ids)
|
82 |
+
'''
|
83 |
+
for n in range(100):
|
84 |
+
all_completed = all(doc.progress == 1 for doc in docs)
|
85 |
+
if all_completed:
|
86 |
+
break
|
87 |
+
sleep(1)
|
88 |
+
else:
|
89 |
+
raise Exception("Run time ERROR: Chunk document parsing did not complete in time.")
|
90 |
+
'''
|
91 |
+
doc = docs[0]
|
92 |
+
doc.list_chunks()
|
93 |
+
|
94 |
+
|
95 |
+
def test_add_chunk_with_success(get_api_key_fixture):
|
96 |
+
API_KEY = get_api_key_fixture
|
97 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
98 |
+
ds = rag.create_dataset(name="test_add_chunk_with_success")
|
99 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
100 |
+
blob = file.read()
|
101 |
+
'''
|
102 |
+
# chunk_size = 1024 * 1024
|
103 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
104 |
+
documents = [
|
105 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
106 |
+
]
|
107 |
+
'''
|
108 |
+
documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
|
109 |
+
docs = ds.upload_documents(documents)
|
110 |
+
doc = docs[0]
|
111 |
+
doc.add_chunk(content="This is a chunk addition test")
|
112 |
+
|
113 |
+
|
114 |
+
@pytest.mark.skip(reason="docs[0] is None")
|
115 |
+
def test_delete_chunk_with_success(get_api_key_fixture):
|
116 |
+
API_KEY = get_api_key_fixture
|
117 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
118 |
+
ds = rag.create_dataset(name="test_delete_chunk_with_success")
|
119 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
120 |
+
blob = file.read()
|
121 |
+
'''
|
122 |
+
# chunk_size = 1024 * 1024
|
123 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
124 |
+
documents = [
|
125 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
126 |
+
]
|
127 |
+
'''
|
128 |
+
documents =[{"displayed_name":"test_list_chunks_with_success.txt","blob":blob}]
|
129 |
+
docs = ds.upload_documents(documents)
|
130 |
+
doc = docs[0]
|
131 |
+
chunk = doc.add_chunk(content="This is a chunk addition test")
|
132 |
+
doc.delete_chunks([chunk.id])
|
133 |
+
|
134 |
+
|
135 |
+
def test_update_chunk_content(get_api_key_fixture):
|
136 |
+
API_KEY = get_api_key_fixture
|
137 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
138 |
+
ds = rag.create_dataset(name="test_update_chunk_content_with_success")
|
139 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
140 |
+
blob = file.read()
|
141 |
+
'''
|
142 |
+
# chunk_size = 1024 * 1024
|
143 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
144 |
+
documents = [
|
145 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
146 |
+
]
|
147 |
+
'''
|
148 |
+
documents =[{"displayed_name":"test_update_chunk_content_with_success.txt","blob":blob}]
|
149 |
+
docs = ds.upload_documents(documents)
|
150 |
+
doc = docs[0]
|
151 |
+
chunk = doc.add_chunk(content="This is a chunk addition test")
|
152 |
+
chunk.update({"content":"This is a updated content"})
|
153 |
+
|
154 |
+
def test_update_chunk_available(get_api_key_fixture):
|
155 |
+
API_KEY = get_api_key_fixture
|
156 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
157 |
+
ds = rag.create_dataset(name="test_update_chunk_available_with_success")
|
158 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
159 |
+
blob = file.read()
|
160 |
+
'''
|
161 |
+
# chunk_size = 1024 * 1024
|
162 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
163 |
+
documents = [
|
164 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
165 |
+
]
|
166 |
+
'''
|
167 |
+
documents =[{"displayed_name":"test_update_chunk_available_with_success.txt","blob":blob}]
|
168 |
+
docs = ds.upload_documents(documents)
|
169 |
+
doc = docs[0]
|
170 |
+
chunk = doc.add_chunk(content="This is a chunk addition test")
|
171 |
+
chunk.update({"available":False})
|
172 |
+
|
173 |
+
|
174 |
+
def test_retrieve_chunks(get_api_key_fixture):
|
175 |
+
API_KEY = get_api_key_fixture
|
176 |
+
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
177 |
+
ds = rag.create_dataset(name="retrieval")
|
178 |
+
with open("test_data/ragflow_test.txt", "rb") as file:
|
179 |
+
blob = file.read()
|
180 |
+
'''
|
181 |
+
# chunk_size = 1024 * 1024
|
182 |
+
# chunks = [blob[i:i + chunk_size] for i in range(0, len(blob), chunk_size)]
|
183 |
+
documents = [
|
184 |
+
{'displayed_name': f'chunk_{i}.txt', 'blob': chunk} for i, chunk in enumerate(chunks)
|
185 |
+
]
|
186 |
+
'''
|
187 |
+
documents =[{"displayed_name":"test_retrieve_chunks.txt","blob":blob}]
|
188 |
+
docs = ds.upload_documents(documents)
|
189 |
+
doc = docs[0]
|
190 |
+
doc.add_chunk(content="This is a chunk addition test")
|
191 |
+
rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id])
|
sdk/python/test/t_dataset.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
-
import
|
2 |
import random
|
3 |
import pytest
|
4 |
-
from
|
5 |
-
|
6 |
-
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
7 |
|
8 |
def test_create_dataset_with_name(get_api_key_fixture):
|
9 |
API_KEY = get_api_key_fixture
|
@@ -13,8 +11,9 @@ def test_create_dataset_with_name(get_api_key_fixture):
|
|
13 |
def test_create_dataset_with_duplicated_name(get_api_key_fixture):
|
14 |
API_KEY = get_api_key_fixture
|
15 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
|
|
16 |
with pytest.raises(Exception) as exc_info:
|
17 |
-
rag.create_dataset("
|
18 |
assert str(exc_info.value) == "Duplicated dataset name in creating dataset."
|
19 |
|
20 |
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
|
@@ -31,7 +30,7 @@ def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
|
|
31 |
"knowledge_graph", "email"]
|
32 |
chunk_method = "invalid_chunk_method"
|
33 |
with pytest.raises(Exception) as exc_info:
|
34 |
-
rag.create_dataset("
|
35 |
assert str(exc_info.value) == f"'{chunk_method}' is not in {valid_chunk_methods}"
|
36 |
|
37 |
|
@@ -45,7 +44,7 @@ def test_update_dataset_with_name(get_api_key_fixture):
|
|
45 |
def test_delete_datasets_with_success(get_api_key_fixture):
|
46 |
API_KEY = get_api_key_fixture
|
47 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
48 |
-
ds = rag.create_dataset("
|
49 |
rag.delete_datasets(ids=[ds.id])
|
50 |
|
51 |
|
|
|
1 |
+
from ragflow_sdk import RAGFlow
|
2 |
import random
|
3 |
import pytest
|
4 |
+
from common import HOST_ADDRESS
|
|
|
|
|
5 |
|
6 |
def test_create_dataset_with_name(get_api_key_fixture):
|
7 |
API_KEY = get_api_key_fixture
|
|
|
11 |
def test_create_dataset_with_duplicated_name(get_api_key_fixture):
|
12 |
API_KEY = get_api_key_fixture
|
13 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
14 |
+
rag.create_dataset("test_create_dataset_with_duplicated_name")
|
15 |
with pytest.raises(Exception) as exc_info:
|
16 |
+
rag.create_dataset("test_create_dataset_with_duplicated_name")
|
17 |
assert str(exc_info.value) == "Duplicated dataset name in creating dataset."
|
18 |
|
19 |
def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
|
|
|
30 |
"knowledge_graph", "email"]
|
31 |
chunk_method = "invalid_chunk_method"
|
32 |
with pytest.raises(Exception) as exc_info:
|
33 |
+
rag.create_dataset("test_create_dataset_with_invalid_chunk_method",chunk_method=chunk_method)
|
34 |
assert str(exc_info.value) == f"'{chunk_method}' is not in {valid_chunk_methods}"
|
35 |
|
36 |
|
|
|
44 |
def test_delete_datasets_with_success(get_api_key_fixture):
|
45 |
API_KEY = get_api_key_fixture
|
46 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
47 |
+
ds = rag.create_dataset("test_delete_dataset")
|
48 |
rag.delete_datasets(ids=[ds.id])
|
49 |
|
50 |
|
sdk/python/test/t_document.py
CHANGED
@@ -1,322 +1,62 @@
|
|
1 |
-
import os
|
2 |
from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
|
3 |
-
|
4 |
-
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
5 |
|
6 |
|
7 |
def test_upload_document_with_success(get_api_key_fixture):
|
8 |
-
"""
|
9 |
-
Test ingesting a document into a dataset with success.
|
10 |
-
"""
|
11 |
-
# Initialize RAGFlow instance
|
12 |
API_KEY = get_api_key_fixture
|
13 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
# Step 2: Create a new document
|
23 |
-
# The blob is the actual file content or a placeholder in this case
|
24 |
-
blob = b"Sample document content for ingestion test."
|
25 |
-
blob_2 = b"test_2."
|
26 |
-
list_1 = []
|
27 |
-
list_1.append({"name": "Test_1.txt",
|
28 |
-
"blob": blob})
|
29 |
-
list_1.append({"name": "Test_2.txt",
|
30 |
-
"blob": blob_2})
|
31 |
-
res = ds.upload_documents(list_1)
|
32 |
-
# Ensure document ingestion was successful
|
33 |
-
assert res is None, f"Failed to create document, error: {res}"
|
34 |
|
35 |
|
36 |
def test_update_document_with_success(get_api_key_fixture):
|
37 |
-
"""
|
38 |
-
Test updating a document with success.
|
39 |
-
Update name or chunk_method are supported
|
40 |
-
"""
|
41 |
API_KEY = get_api_key_fixture
|
42 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
43 |
-
ds = rag.
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
assert res is None, f"Failed to update document, error: {res}"
|
50 |
-
else:
|
51 |
-
assert False, f"Failed to get document, error: {doc}"
|
52 |
|
53 |
|
54 |
def test_download_document_with_success(get_api_key_fixture):
|
55 |
-
"""
|
56 |
-
Test downloading a document with success.
|
57 |
-
"""
|
58 |
API_KEY = get_api_key_fixture
|
59 |
-
# Initialize RAGFlow instance
|
60 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
doc =
|
66 |
-
|
67 |
-
|
68 |
-
if isinstance(doc, Document):
|
69 |
-
# Download the document content and save it to a file
|
70 |
-
with open("ragflow.txt", "wb+") as file:
|
71 |
-
file.write(doc.download())
|
72 |
-
# Print the document object for debugging
|
73 |
-
print(doc)
|
74 |
-
|
75 |
-
# Assert that the download was successful
|
76 |
-
assert True, f"Failed to download document, error: {doc}"
|
77 |
-
else:
|
78 |
-
# If the document retrieval fails, assert failure
|
79 |
-
assert False, f"Failed to get document, error: {doc}"
|
80 |
|
81 |
|
82 |
def test_list_documents_in_dataset_with_success(get_api_key_fixture):
|
83 |
-
"""
|
84 |
-
Test list all documents into a dataset with success.
|
85 |
-
"""
|
86 |
API_KEY = get_api_key_fixture
|
87 |
-
# Initialize RAGFlow instance
|
88 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
-
# Step 1: Create a new dataset
|
91 |
-
ds = rag.create_dataset(name="God2")
|
92 |
-
|
93 |
-
# Ensure dataset creation was successful
|
94 |
-
assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
|
95 |
-
assert ds.name == "God2", "Dataset name does not match."
|
96 |
-
|
97 |
-
# Step 2: Create a new document
|
98 |
-
# The blob is the actual file content or a placeholder in this case
|
99 |
-
name1 = "Test Document111.txt"
|
100 |
-
blob1 = b"Sample document content for ingestion test111."
|
101 |
-
name2 = "Test Document222.txt"
|
102 |
-
blob2 = b"Sample document content for ingestion test222."
|
103 |
-
list_1 = [{"name": name1, "blob": blob1}, {"name": name2, "blob": blob2}]
|
104 |
-
ds.upload_documents(list_1)
|
105 |
-
for d in ds.list_documents(keywords="test", offset=0, limit=12):
|
106 |
-
assert isinstance(d, Document), "Failed to upload documents"
|
107 |
|
108 |
|
109 |
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
110 |
-
"""
|
111 |
-
Test list all documents into a dataset with success.
|
112 |
-
"""
|
113 |
-
API_KEY = get_api_key_fixture
|
114 |
-
# Initialize RAGFlow instance
|
115 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
116 |
-
|
117 |
-
# Step 1: Create a new dataset
|
118 |
-
ds = rag.create_dataset(name="God3")
|
119 |
-
|
120 |
-
# Ensure dataset creation was successful
|
121 |
-
assert isinstance(ds, DataSet), f"Failed to create dataset, error: {ds}"
|
122 |
-
assert ds.name == "God3", "Dataset name does not match."
|
123 |
-
|
124 |
-
# Step 2: Create a new document
|
125 |
-
# The blob is the actual file content or a placeholder in this case
|
126 |
-
name1 = "Test Document333.txt"
|
127 |
-
blob1 = b"Sample document content for ingestion test333."
|
128 |
-
name2 = "Test Document444.txt"
|
129 |
-
blob2 = b"Sample document content for ingestion test444."
|
130 |
-
ds.upload_documents([{"name": name1, "blob": blob1}, {"name": name2, "blob": blob2}])
|
131 |
-
for d in ds.list_documents(keywords="document", offset=0, limit=12):
|
132 |
-
assert isinstance(d, Document)
|
133 |
-
ds.delete_documents([d.id])
|
134 |
-
remaining_docs = ds.list_documents(keywords="rag", offset=0, limit=12)
|
135 |
-
assert len(remaining_docs) == 0, "Documents were not properly deleted."
|
136 |
-
|
137 |
-
|
138 |
-
def test_parse_and_cancel_document(get_api_key_fixture):
|
139 |
API_KEY = get_api_key_fixture
|
140 |
-
# Initialize RAGFlow with API key and host address
|
141 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
-
# Create a dataset with a specific name
|
144 |
-
ds = rag.create_dataset(name="God4")
|
145 |
-
|
146 |
-
# Define the document name and path
|
147 |
-
name3 = 'westworld.pdf'
|
148 |
-
path = 'test_data/westworld.pdf'
|
149 |
-
|
150 |
-
# Create a document in the dataset using the file path
|
151 |
-
ds.upload_documents({"name": name3, "blob": open(path, "rb").read()})
|
152 |
-
|
153 |
-
# Retrieve the document by name
|
154 |
-
doc = rag.list_documents(name="westworld.pdf")
|
155 |
-
doc = doc[0]
|
156 |
-
ds.async_parse_documents(document_ids=[])
|
157 |
-
|
158 |
-
# Print message to confirm asynchronous parsing has been initiated
|
159 |
-
print("Async parsing initiated")
|
160 |
-
|
161 |
-
# Use join to wait for parsing to complete and get progress updates
|
162 |
-
for progress, msg in doc.join(interval=5, timeout=10):
|
163 |
-
print(progress, msg)
|
164 |
-
# Assert that the progress is within the valid range (0 to 100)
|
165 |
-
assert 0 <= progress <= 100, f"Invalid progress: {progress}"
|
166 |
-
# Assert that the message is not empty
|
167 |
-
assert msg, "Message should not be empty"
|
168 |
-
# Test cancelling the parsing operation
|
169 |
-
doc.cancel()
|
170 |
-
# Print message to confirm parsing has been cancelled successfully
|
171 |
-
print("Parsing cancelled successfully")
|
172 |
-
|
173 |
-
|
174 |
-
def test_bulk_parse_and_cancel_documents(get_api_key_fixture):
|
175 |
-
API_KEY = get_api_key_fixture
|
176 |
-
# Initialize RAGFlow with API key and host address
|
177 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
178 |
-
|
179 |
-
# Create a dataset
|
180 |
-
ds = rag.create_dataset(name="God5")
|
181 |
-
assert ds is not None, "Dataset creation failed"
|
182 |
-
assert ds.name == "God5", "Dataset name does not match"
|
183 |
-
|
184 |
-
# Prepare a list of file names and paths
|
185 |
-
documents = [
|
186 |
-
{'name': 'test1.txt', 'path': 'test_data/test1.txt'},
|
187 |
-
{'name': 'test2.txt', 'path': 'test_data/test2.txt'},
|
188 |
-
{'name': 'test3.txt', 'path': 'test_data/test3.txt'}
|
189 |
-
]
|
190 |
-
|
191 |
-
# Create documents in bulk
|
192 |
-
for doc_info in documents:
|
193 |
-
with open(doc_info['path'], "rb") as file:
|
194 |
-
created_doc = rag.create_document(ds, name=doc_info['name'], blob=file.read())
|
195 |
-
assert created_doc is not None, f"Failed to create document {doc_info['name']}"
|
196 |
-
|
197 |
-
# Retrieve document objects in bulk
|
198 |
-
docs = [rag.get_document(name=doc_info['name']) for doc_info in documents]
|
199 |
-
ids = [doc.id for doc in docs]
|
200 |
-
assert len(docs) == len(documents), "Mismatch between created documents and fetched documents"
|
201 |
-
|
202 |
-
# Initiate asynchronous parsing for all documents
|
203 |
-
rag.async_parse_documents(ids)
|
204 |
-
print("Async bulk parsing initiated")
|
205 |
-
|
206 |
-
# Wait for all documents to finish parsing and check progress
|
207 |
-
for doc in docs:
|
208 |
-
for progress, msg in doc.join(interval=5, timeout=10):
|
209 |
-
print(f"{doc.name}: Progress: {progress}, Message: {msg}")
|
210 |
|
211 |
-
# Assert that progress is within the valid range
|
212 |
-
assert 0 <= progress <= 100, f"Invalid progress: {progress} for document {doc.name}"
|
213 |
-
|
214 |
-
# Assert that the message is not empty
|
215 |
-
assert msg, f"Message should not be empty for document {doc.name}"
|
216 |
-
|
217 |
-
# If progress reaches 100%, assert that parsing is completed successfully
|
218 |
-
if progress == 100:
|
219 |
-
assert "completed" in msg.lower(), f"Document {doc.name} did not complete successfully"
|
220 |
-
|
221 |
-
# Cancel parsing for all documents in bulk
|
222 |
-
cancel_result = rag.async_cancel_parse_documents(ids)
|
223 |
-
assert cancel_result is None or isinstance(cancel_result, type(None)), "Failed to cancel document parsing"
|
224 |
-
print("Async bulk parsing cancelled")
|
225 |
-
|
226 |
-
|
227 |
-
def test_parse_document_and_chunk_list(get_api_key_fixture):
|
228 |
-
API_KEY = get_api_key_fixture
|
229 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
230 |
-
ds = rag.create_dataset(name="God7")
|
231 |
-
name = 'story.txt'
|
232 |
-
path = 'test_data/story.txt'
|
233 |
-
# name = "Test Document rag.txt"
|
234 |
-
# blob = " Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps.Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps. Sample document content for rag test66. rag wonderful apple os documents apps."
|
235 |
-
rag.create_document(ds, name=name, blob=open(path, "rb").read())
|
236 |
-
doc = rag.get_document(name=name)
|
237 |
-
doc.async_parse()
|
238 |
-
|
239 |
-
# Wait for parsing to complete and get progress updates using join
|
240 |
-
for progress, msg in doc.join(interval=5, timeout=30):
|
241 |
-
print(progress, msg)
|
242 |
-
# Assert that progress is within 0 to 100
|
243 |
-
assert 0 <= progress <= 100, f"Invalid progress: {progress}"
|
244 |
-
# Assert that the message is not empty
|
245 |
-
assert msg, "Message should not be empty"
|
246 |
-
|
247 |
-
for c in doc.list_chunks(keywords="rag", offset=0, limit=12):
|
248 |
-
print(c)
|
249 |
-
assert c is not None, "Chunk is None"
|
250 |
-
assert "rag" in c['content_with_weight'].lower(), f"Keyword 'rag' not found in chunk content: {c.content}"
|
251 |
-
|
252 |
-
|
253 |
-
def test_add_chunk_to_chunk_list(get_api_key_fixture):
|
254 |
-
API_KEY = get_api_key_fixture
|
255 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
256 |
-
doc = rag.get_document(name='story.txt')
|
257 |
-
chunk = doc.add_chunk(content="assssdd")
|
258 |
-
assert chunk is not None, "Chunk is None"
|
259 |
-
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
260 |
-
|
261 |
-
|
262 |
-
def test_delete_chunk_of_chunk_list(get_api_key_fixture):
|
263 |
-
API_KEY = get_api_key_fixture
|
264 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
265 |
-
doc = rag.get_document(name='story.txt')
|
266 |
-
chunk = doc.add_chunk(content="assssdd")
|
267 |
-
assert chunk is not None, "Chunk is None"
|
268 |
-
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
269 |
-
doc = rag.get_document(name='story.txt')
|
270 |
-
chunk_count_before = doc.chunk_count
|
271 |
-
chunk.delete()
|
272 |
-
doc = rag.get_document(name='story.txt')
|
273 |
-
assert doc.chunk_count == chunk_count_before - 1, "Chunk was not deleted"
|
274 |
-
|
275 |
-
|
276 |
-
def test_update_chunk_content(get_api_key_fixture):
|
277 |
-
API_KEY = get_api_key_fixture
|
278 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
279 |
-
doc = rag.get_document(name='story.txt')
|
280 |
-
chunk = doc.add_chunk(content="assssddd")
|
281 |
-
assert chunk is not None, "Chunk is None"
|
282 |
-
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
283 |
-
chunk.content = "ragflow123"
|
284 |
-
res = chunk.save()
|
285 |
-
assert res is True, f"Failed to update chunk content, error: {res}"
|
286 |
-
|
287 |
-
|
288 |
-
def test_update_chunk_available(get_api_key_fixture):
|
289 |
-
API_KEY = get_api_key_fixture
|
290 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
291 |
-
doc = rag.get_document(name='story.txt')
|
292 |
-
chunk = doc.add_chunk(content="ragflow")
|
293 |
-
assert chunk is not None, "Chunk is None"
|
294 |
-
assert isinstance(chunk, Chunk), "Chunk was not added to chunk list"
|
295 |
-
chunk.available = 0
|
296 |
-
res = chunk.save()
|
297 |
-
assert res is True, f"Failed to update chunk status, error: {res}"
|
298 |
-
|
299 |
-
|
300 |
-
def test_retrieval_chunks(get_api_key_fixture):
|
301 |
-
API_KEY = get_api_key_fixture
|
302 |
-
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
303 |
-
ds = rag.create_dataset(name="God8")
|
304 |
-
name = 'ragflow_test.txt'
|
305 |
-
path = 'test_data/ragflow_test.txt'
|
306 |
-
rag.create_document(ds, name=name, blob=open(path, "rb").read())
|
307 |
-
doc = rag.get_document(name=name)
|
308 |
-
doc.async_parse()
|
309 |
-
# Wait for parsing to complete and get progress updates using join
|
310 |
-
for progress, msg in doc.join(interval=5, timeout=30):
|
311 |
-
print(progress, msg)
|
312 |
-
assert 0 <= progress <= 100, f"Invalid progress: {progress}"
|
313 |
-
assert msg, "Message should not be empty"
|
314 |
-
for c in rag.retrieval(question="What's ragflow?",
|
315 |
-
datasets=[ds.id], documents=[doc],
|
316 |
-
offset=0, limit=6, similarity_threshold=0.1,
|
317 |
-
vector_similarity_weight=0.3,
|
318 |
-
top_k=1024
|
319 |
-
):
|
320 |
-
print(c)
|
321 |
-
assert c is not None, "Chunk is None"
|
322 |
-
assert "ragflow" in c.content.lower(), f"Keyword 'rag' not found in chunk content: {c.content}"
|
|
|
|
|
1 |
from ragflow_sdk import RAGFlow, DataSet, Document, Chunk
|
2 |
+
from common import HOST_ADDRESS
|
|
|
3 |
|
4 |
|
5 |
def test_upload_document_with_success(get_api_key_fixture):
|
|
|
|
|
|
|
|
|
6 |
API_KEY = get_api_key_fixture
|
7 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
8 |
+
ds = rag.create_dataset(name="test_upload_document")
|
9 |
+
blob = b"Sample document content for test."
|
10 |
+
with open("ragflow.txt","rb") as file:
|
11 |
+
blob_2=file.read()
|
12 |
+
document_infos = []
|
13 |
+
document_infos.append({"displayed_name": "test_1.txt","blob": blob})
|
14 |
+
document_infos.append({"displayed_name": "test_2.txt","blob": blob_2})
|
15 |
+
ds.upload_documents(document_infos)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
def test_update_document_with_success(get_api_key_fixture):
|
|
|
|
|
|
|
|
|
19 |
API_KEY = get_api_key_fixture
|
20 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
21 |
+
ds = rag.create_dataset(name="test_update_document")
|
22 |
+
blob = b"Sample document content for test."
|
23 |
+
document_infos=[{"displayed_name":"test.txt","blob":blob}]
|
24 |
+
docs=ds.upload_documents(document_infos)
|
25 |
+
doc = docs[0]
|
26 |
+
doc.update({"chunk_method": "manual", "name": "manual.txt"})
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
def test_download_document_with_success(get_api_key_fixture):
|
|
|
|
|
|
|
30 |
API_KEY = get_api_key_fixture
|
|
|
31 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
32 |
+
ds = rag.create_dataset(name="test_download_document")
|
33 |
+
blob = b"Sample document content for test."
|
34 |
+
document_infos=[{"displayed_name": "test_1.txt","blob": blob}]
|
35 |
+
docs=ds.upload_documents(document_infos)
|
36 |
+
doc = docs[0]
|
37 |
+
with open("test_download.txt","wb+") as file:
|
38 |
+
file.write(doc.download())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
def test_list_documents_in_dataset_with_success(get_api_key_fixture):
|
|
|
|
|
|
|
42 |
API_KEY = get_api_key_fixture
|
|
|
43 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
44 |
+
ds = rag.create_dataset(name="test_list_documents")
|
45 |
+
blob = b"Sample document content for test."
|
46 |
+
document_infos = [{"displayed_name": "test.txt","blob":blob}]
|
47 |
+
ds.upload_documents(document_infos)
|
48 |
+
ds.list_documents(keywords="test", offset=0, limit=12)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
|
52 |
def test_delete_documents_in_dataset_with_success(get_api_key_fixture):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
API_KEY = get_api_key_fixture
|
|
|
54 |
rag = RAGFlow(API_KEY, HOST_ADDRESS)
|
55 |
+
ds = rag.create_dataset(name="test_delete_documents")
|
56 |
+
name = "test_delete_documents.txt"
|
57 |
+
blob = b"Sample document content for test."
|
58 |
+
document_infos=[{"displayed_name": name, "blob": blob}]
|
59 |
+
docs = ds.upload_documents(document_infos)
|
60 |
+
ds.delete_documents([docs[0].id])
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sdk/python/test/t_session.py
CHANGED
@@ -1,7 +1,5 @@
|
|
1 |
-
import os
|
2 |
from ragflow_sdk import RAGFlow
|
3 |
-
|
4 |
-
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')
|
5 |
|
6 |
|
7 |
def test_create_session_with_success(get_api_key_fixture):
|
|
|
|
|
1 |
from ragflow_sdk import RAGFlow
|
2 |
+
from common import HOST_ADDRESS
|
|
|
3 |
|
4 |
|
5 |
def test_create_session_with_success(get_api_key_fixture):
|