yqkcn
commited on
Commit
·
0622917
1
Parent(s):
2493f1d
Refactoring large integers to improve readability (#2636)
Browse files### What problem does this PR solve?
Refactoring large integers
### Type of change
- [x] Refactoring
api/db/services/task_service.py
CHANGED
@@ -133,9 +133,8 @@ class TaskService(CommonService):
|
|
133 |
cls.model.id == id).execute()
|
134 |
|
135 |
|
136 |
-
def queue_tasks(doc, bucket, name):
|
137 |
def new_task():
|
138 |
-
nonlocal doc
|
139 |
return {
|
140 |
"id": get_uuid(),
|
141 |
"doc_id": doc["id"]
|
@@ -149,15 +148,9 @@ def queue_tasks(doc, bucket, name):
|
|
149 |
page_size = doc["parser_config"].get("task_page_size", 12)
|
150 |
if doc["parser_id"] == "paper":
|
151 |
page_size = doc["parser_config"].get("task_page_size", 22)
|
152 |
-
if doc["parser_id"]
|
153 |
-
page_size =
|
154 |
-
|
155 |
-
page_size = 1000000000
|
156 |
-
if not do_layout:
|
157 |
-
page_size = 1000000000
|
158 |
-
page_ranges = doc["parser_config"].get("pages")
|
159 |
-
if not page_ranges:
|
160 |
-
page_ranges = [(1, 100000)]
|
161 |
for s, e in page_ranges:
|
162 |
s -= 1
|
163 |
s = max(0, s)
|
@@ -170,8 +163,7 @@ def queue_tasks(doc, bucket, name):
|
|
170 |
|
171 |
elif doc["parser_id"] == "table":
|
172 |
file_bin = STORAGE_IMPL.get(bucket, name)
|
173 |
-
rn = RAGFlowExcelParser.row_number(
|
174 |
-
doc["name"], file_bin)
|
175 |
for i in range(0, rn, 3000):
|
176 |
task = new_task()
|
177 |
task["from_page"] = i
|
|
|
133 |
cls.model.id == id).execute()
|
134 |
|
135 |
|
136 |
+
def queue_tasks(doc: dict, bucket: str, name: str):
|
137 |
def new_task():
|
|
|
138 |
return {
|
139 |
"id": get_uuid(),
|
140 |
"doc_id": doc["id"]
|
|
|
148 |
page_size = doc["parser_config"].get("task_page_size", 12)
|
149 |
if doc["parser_id"] == "paper":
|
150 |
page_size = doc["parser_config"].get("task_page_size", 22)
|
151 |
+
if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
|
152 |
+
page_size = 10 ** 9
|
153 |
+
page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
for s, e in page_ranges:
|
155 |
s -= 1
|
156 |
s = max(0, s)
|
|
|
163 |
|
164 |
elif doc["parser_id"] == "table":
|
165 |
file_bin = STORAGE_IMPL.get(bucket, name)
|
166 |
+
rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
|
|
|
167 |
for i in range(0, rn, 3000):
|
168 |
task = new_task()
|
169 |
task["from_page"] = i
|