Spaces:
Running
Running
| import os | |
| from typing import List, Tuple | |
| import requests | |
| from PIL import Image, ImageDraw, ImageFont | |
| from surya.postprocessing.fonts import get_font_path | |
| from surya.schema import TextLine | |
| from surya.settings import settings | |
| from surya.postprocessing.math.latex import is_latex | |
| def sort_text_lines(lines: List[TextLine] | List[dict], tolerance=1.25): | |
| # Sorts in reading order. Not 100% accurate, this should only | |
| # be used as a starting point for more advanced sorting. | |
| vertical_groups = {} | |
| for line in lines: | |
| group_key = round(line.bbox[1] if isinstance(line, TextLine) else line["bbox"][1] / tolerance) * tolerance | |
| if group_key not in vertical_groups: | |
| vertical_groups[group_key] = [] | |
| vertical_groups[group_key].append(line) | |
| # Sort each group horizontally and flatten the groups into a single list | |
| sorted_lines = [] | |
| for _, group in sorted(vertical_groups.items()): | |
| sorted_group = sorted(group, key=lambda x: x.bbox[0] if isinstance(x, TextLine) else x["bbox"][0]) | |
| sorted_lines.extend(sorted_group) | |
| return sorted_lines | |
| def truncate_repetitions(text: str, min_len=15): | |
| # From nougat, with some cleanup | |
| if len(text) < 2 * min_len: | |
| return text | |
| # try to find a length at which the tail is repeating | |
| max_rep_len = None | |
| for rep_len in range(min_len, int(len(text) / 2)): | |
| # check if there is a repetition at the end | |
| same = True | |
| for i in range(0, rep_len): | |
| if text[len(text) - rep_len - i - 1] != text[len(text) - i - 1]: | |
| same = False | |
| break | |
| if same: | |
| max_rep_len = rep_len | |
| if max_rep_len is None: | |
| return text | |
| lcs = text[-max_rep_len:] | |
| # remove all but the last repetition | |
| text_to_truncate = text | |
| while text_to_truncate.endswith(lcs): | |
| text_to_truncate = text_to_truncate[:-max_rep_len] | |
| return text[:len(text_to_truncate)] | |
| def get_text_size(text, font): | |
| im = Image.new(mode="P", size=(0, 0)) | |
| draw = ImageDraw.Draw(im) | |
| _, _, width, height = draw.textbbox((0, 0), text=text, font=font) | |
| return width, height | |
| def render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size): | |
| font = ImageFont.truetype(font_path, box_font_size) | |
| text_width, text_height = get_text_size(text, font) | |
| while (text_width > bbox_width or text_height > bbox_height) and box_font_size > 6: | |
| box_font_size = box_font_size - 1 | |
| font = ImageFont.truetype(font_path, box_font_size) | |
| text_width, text_height = get_text_size(text, font) | |
| # Calculate text position (centered in bbox) | |
| text_width, text_height = get_text_size(text, font) | |
| x = s_bbox[0] | |
| y = s_bbox[1] + (bbox_height - text_height) / 2 | |
| draw.text((x, y), text, fill="black", font=font) | |
| def render_math(image, draw, text, s_bbox, bbox_width, bbox_height, font_path): | |
| try: | |
| from surya.postprocessing.math.render import latex_to_pil | |
| box_font_size = max(10, min(int(.2 * bbox_height), 24)) | |
| img = latex_to_pil(text, bbox_width, bbox_height, fontsize=box_font_size) | |
| img.thumbnail((bbox_width, bbox_height)) | |
| image.paste(img, (s_bbox[0], s_bbox[1])) | |
| except Exception as e: | |
| print(f"Failed to render math: {e}") | |
| box_font_size = max(10, min(int(.75 * bbox_height), 24)) | |
| render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size) | |
| def draw_text_on_image(bboxes, texts, image_size: Tuple[int, int], langs: List[str], font_path=None, max_font_size=60, res_upscale=2, has_math=False): | |
| if font_path is None: | |
| font_path = get_font_path(langs) | |
| new_image_size = (image_size[0] * res_upscale, image_size[1] * res_upscale) | |
| image = Image.new('RGB', new_image_size, color='white') | |
| draw = ImageDraw.Draw(image) | |
| for bbox, text in zip(bboxes, texts): | |
| s_bbox = [int(coord * res_upscale) for coord in bbox] | |
| bbox_width = s_bbox[2] - s_bbox[0] | |
| bbox_height = s_bbox[3] - s_bbox[1] | |
| # Shrink the text to fit in the bbox if needed | |
| if has_math and is_latex(text): | |
| render_math(image, draw, text, s_bbox, bbox_width, bbox_height, font_path) | |
| else: | |
| box_font_size = max(6, min(int(.75 * bbox_height), max_font_size)) | |
| render_text(draw, text, s_bbox, bbox_width, bbox_height, font_path, box_font_size) | |
| return image | |