Kevin Hu commited on
Commit
b07ff91
·
1 Parent(s): d1bf860

Accelerate titles' embeddings. (#4492)

Browse files

### What problem does this PR solve?


### Type of change

- [x] Performance Improvement

deepdoc/vision/layout_recognizer.py CHANGED
@@ -78,7 +78,7 @@ class LayoutRecognizer(Recognizer):
78
  "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
79
  "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
80
  "page_number": pn,
81
- } for b in lts if float(b["score"]) >= 0.8 or b["type"] not in self.garbage_layouts]
82
  lts = self.sort_Y_firstly(lts, np.mean(
83
  [lt["bottom"] - lt["top"] for lt in lts]) / 2)
84
  lts = self.layouts_cleanup(bxs, lts)
 
78
  "x0": b["bbox"][0] / scale_factor, "x1": b["bbox"][2] / scale_factor,
79
  "top": b["bbox"][1] / scale_factor, "bottom": b["bbox"][-1] / scale_factor,
80
  "page_number": pn,
81
+ } for b in lts if float(b["score"]) >= 0.4 or b["type"] not in self.garbage_layouts]
82
  lts = self.sort_Y_firstly(lts, np.mean(
83
  [lt["bottom"] - lt["top"] for lt in lts]) / 2)
84
  lts = self.layouts_cleanup(bxs, lts)
rag/svr/task_executor.py CHANGED
@@ -354,16 +354,9 @@ def embedding(docs, mdl, parser_config=None, callback=None):
354
 
355
  tk_count = 0
356
  if len(tts) == len(cnts):
357
- tts_ = np.array([])
358
- for i in range(0, len(tts), batch_size):
359
- vts, c = mdl.encode(tts[i: i + batch_size])
360
- if len(tts_) == 0:
361
- tts_ = vts
362
- else:
363
- tts_ = np.concatenate((tts_, vts), axis=0)
364
- tk_count += c
365
- callback(prog=0.6 + 0.1 * (i + 1) / len(tts), msg="")
366
- tts = tts_
367
 
368
  cnts_ = np.array([])
369
  for i in range(0, len(cnts), batch_size):
 
354
 
355
  tk_count = 0
356
  if len(tts) == len(cnts):
357
+ vts, c = mdl.encode(tts[0: 1])
358
+ tts = np.concatenate([vts for _ in range(len(tts))], axis=0)
359
+ tk_count += c
 
 
 
 
 
 
 
360
 
361
  cnts_ = np.array([])
362
  for i in range(0, len(cnts), batch_size):