| import re | |
| def layout_rm_equation(layout_res): | |
| rm_idxs = [] | |
| for idx, ele in enumerate(layout_res['layout_dets']): | |
| if ele['category_id'] == 10: | |
| rm_idxs.append(idx) | |
| for idx in rm_idxs[::-1]: | |
| del layout_res['layout_dets'][idx] | |
| return layout_res | |
| def get_croped_image(image_pil, bbox): | |
| x_min, y_min, x_max, y_max = bbox | |
| croped_img = image_pil.crop((x_min, y_min, x_max, y_max)) | |
| return croped_img | |
| def latex_rm_whitespace(s: str): | |
| """Remove unnecessary whitespace from LaTeX code. | |
| """ | |
| text_reg = r'(\\(operatorname|mathrm|text|mathbf)\s?\*? {.*?})' | |
| letter = '[a-zA-Z]' | |
| noletter = '[\W_^\d]' | |
| names = [x[0].replace(' ', '') for x in re.findall(text_reg, s)] | |
| s = re.sub(text_reg, lambda match: str(names.pop(0)), s) | |
| news = s | |
| while True: | |
| s = news | |
| news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, noletter), r'\1\2', s) | |
| news = re.sub(r'(?!\\ )(%s)\s+?(%s)' % (noletter, letter), r'\1\2', news) | |
| news = re.sub(r'(%s)\s+?(%s)' % (letter, noletter), r'\1\2', news) | |
| if news == s: | |
| break | |
| return s |