Kevin Hu
commited on
Commit
·
b07ff91
1
Parent(s):
d1bf860
Accelerate titles' embeddings. (#4492)
Browse files### What problem does this PR solve?
### Type of change
- [x] Performance Improvement
deepdoc/vision/layout_recognizer.py
CHANGED
@@ -78,7 +78,7 @@ class LayoutRecognizer(Recognizer):
|
|
78 |
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
79 |
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
80 |
"page_number": pn,
|
81 |
-
} for b in lts if float(b["score"]) >= 0.
|
82 |
lts = self.sort_Y_firstly(lts, np.mean(
|
83 |
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
84 |
lts = self.layouts_cleanup(bxs, lts)
|
|
|
78 |
"x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
|
79 |
"top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
|
80 |
"page_number": pn,
|
81 |
+
} for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
|
82 |
lts = self.sort_Y_firstly(lts, np.mean(
|
83 |
[lt["bottom"] - lt["top"] for lt in lts]) / 2)
|
84 |
lts = self.layouts_cleanup(bxs, lts)
|
rag/svr/task_executor.py
CHANGED
@@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
|
354 |
|
355 |
tk_count = 0
|
356 |
if len(tts) == len(cnts):
|
357 |
-
|
358 |
-
for
|
359 |
-
|
360 |
-
if len(tts_) == 0:
|
361 |
-
tts_ = vts
|
362 |
-
else:
|
363 |
-
tts_ = np.concatenate((tts_, vts), axis=0)
|
364 |
-
tk_count += c
|
365 |
-
callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
|
366 |
-
tts = tts_
|
367 |
|
368 |
cnts_ = np.array([])
|
369 |
for i in range(0, len(cnts), batch_size):
|
|
|
354 |
|
355 |
tk_count = 0
|
356 |
if len(tts) == len(cnts):
|
357 |
+
vts, c = mdl.encode(tts[0: 1])
|
358 |
+
tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
|
359 |
+
tk_count += c
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
|
361 |
cnts_ = np.array([])
|
362 |
for i in range(0, len(cnts), batch_size):
|