Spaces:
Running
Running
import os | |
import time | |
from typing import Dict, Any | |
import numpy as np | |
from PIL import Image | |
from ultralytics import YOLO | |
from utils.predict_bounding_boxes import predict_bounding_boxes | |
from utils.manga_ocr_utils import get_text_from_image | |
from utils.translate_manga import translate_manga | |
from utils.process_contour import process_contour | |
from utils.write_text_on_image import add_text | |
MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt" | |
object_detection_model = YOLO(MODEL_PATH) | |
def extract_text_from_regions( | |
image: np.ndarray, target_lang: str, results: list | |
) -> Dict[str, Any]: | |
for result in results: | |
x1, y1, x2, y2, _, _ = result | |
detected_image = image[int(y1) : int(y2), int(x1) : int(x2)] | |
if detected_image.shape[-1] == 4: | |
detected_image = detected_image[:, :, :3] | |
im = Image.fromarray(np.uint8(detected_image * 255)) | |
text = get_text_from_image(im) | |
processed_image, cont = process_contour(detected_image) | |
translated_text = translate_manga( | |
text, target_lang=target_lang, source_lang="ja-JP" | |
) | |
if translated_text is None: | |
translated_text = "Translation failed" | |
add_text(processed_image, translated_text, cont) | |
def predict(image: np.ndarray, target_lang: str): | |
timestamp = str(int(time.time() * 1000000)) # Generate a unique timestamp | |
temp_filename = f"image_{timestamp}.png" | |
image = Image.fromarray(image) | |
image.save(temp_filename) | |
try: | |
np_image = np.array(image) | |
results = predict_bounding_boxes(object_detection_model, temp_filename) | |
extract_text_from_regions(np_image, target_lang, results) | |
return np_image | |
except Exception as e: | |
print(f"Error: {str(e)}") | |
return None | |
finally: | |
# Clean up the temporary file | |
if os.path.exists(temp_filename): | |
try: | |
os.remove(temp_filename) | |
except OSError as e: | |
print(f"Warning: Could not remove temporary file {temp_filename}: {e}") | |