import os import time from typing import Dict, Any import numpy as np from PIL import Image from ultralytics import YOLO from utils.predict_bounding_boxes import predict_bounding_boxes from utils.manga_ocr_utils import get_text_from_image from utils.translate_manga import translate_manga from utils.process_contour import process_contour from utils.write_text_on_image import add_text MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt" object_detection_model = YOLO(MODEL_PATH) def extract_text_from_regions( image: np.ndarray, target_lang: str, results: list ) -> Dict[str, Any]: for result in results: x1, y1, x2, y2, _, _ = result detected_image = image[int(y1) : int(y2), int(x1) : int(x2)] if detected_image.shape[-1] == 4: detected_image = detected_image[:, :, :3] im = Image.fromarray(np.uint8(detected_image * 255)) text = get_text_from_image(im) processed_image, cont = process_contour(detected_image) translated_text = translate_manga( text, target_lang=target_lang, source_lang="ja-JP" ) if translated_text is None: translated_text = "Translation failed" add_text(processed_image, translated_text, cont) def predict(image: np.ndarray, target_lang: str): timestamp = str(int(time.time() * 1000000)) # Generate a unique timestamp temp_filename = f"image_{timestamp}.png" image = Image.fromarray(image) image.save(temp_filename) try: np_image = np.array(image) results = predict_bounding_boxes(object_detection_model, temp_filename) extract_text_from_regions(np_image, target_lang, results) return np_image except Exception as e: print(f"Error: {str(e)}") return None finally: # Clean up the temporary file if os.path.exists(temp_filename): try: os.remove(temp_filename) except OSError as e: print(f"Warning: Could not remove temporary file {temp_filename}: {e}")