yqkcn commited on
Commit
0622917
·
1 Parent(s): 2493f1d

Refactoring large integers to improve readability (#2636)

Browse files

### What problem does this PR solve?

Refactoring large integers

### Type of change

- [x] Refactoring

Files changed (1) hide show
  1. api/db/services/task_service.py +5 -13
api/db/services/task_service.py CHANGED
@@ -133,9 +133,8 @@ class TaskService(CommonService):
133
  cls.model.id == id).execute()
134
 
135
 
136
- def queue_tasks(doc, bucket, name):
137
  def new_task():
138
- nonlocal doc
139
  return {
140
  "id": get_uuid(),
141
  "doc_id": doc["id"]
@@ -149,15 +148,9 @@ def queue_tasks(doc, bucket, name):
149
  page_size = doc["parser_config"].get("task_page_size", 12)
150
  if doc["parser_id"] == "paper":
151
  page_size = doc["parser_config"].get("task_page_size", 22)
152
- if doc["parser_id"] == "one":
153
- page_size = 1000000000
154
- if doc["parser_id"] == "knowledge_graph":
155
- page_size = 1000000000
156
- if not do_layout:
157
- page_size = 1000000000
158
- page_ranges = doc["parser_config"].get("pages")
159
- if not page_ranges:
160
- page_ranges = [(1, 100000)]
161
  for s, e in page_ranges:
162
  s -= 1
163
  s = max(0, s)
@@ -170,8 +163,7 @@ def queue_tasks(doc, bucket, name):
170
 
171
  elif doc["parser_id"] == "table":
172
  file_bin = STORAGE_IMPL.get(bucket, name)
173
- rn = RAGFlowExcelParser.row_number(
174
- doc["name"], file_bin)
175
  for i in range(0, rn, 3000):
176
  task = new_task()
177
  task["from_page"] = i
 
133
  cls.model.id == id).execute()
134
 
135
 
136
+ def queue_tasks(doc: dict, bucket: str, name: str):
137
  def new_task():
 
138
  return {
139
  "id": get_uuid(),
140
  "doc_id": doc["id"]
 
148
  page_size = doc["parser_config"].get("task_page_size", 12)
149
  if doc["parser_id"] == "paper":
150
  page_size = doc["parser_config"].get("task_page_size", 22)
151
+ if doc["parser_id"] in ["one", "knowledge_graph"] or not do_layout:
152
+ page_size = 10 ** 9
153
+ page_ranges = doc["parser_config"].get("pages") or [(1, 10 ** 5)]
 
 
 
 
 
 
154
  for s, e in page_ranges:
155
  s -= 1
156
  s = max(0, s)
 
163
 
164
  elif doc["parser_id"] == "table":
165
  file_bin = STORAGE_IMPL.get(bucket, name)
166
+ rn = RAGFlowExcelParser.row_number(doc["name"], file_bin)
 
167
  for i in range(0, rn, 3000):
168
  task = new_task()
169
  task["from_page"] = i