manga-translator / main.py
DenisT's picture
add: check for the text being small so it can't go below 0, also file isn't image.png but the current timestamp
08faac7
raw
history blame
2.08 kB
import os
import time
from typing import Dict, Any
import numpy as np
from PIL import Image
from ultralytics import YOLO
from utils.predict_bounding_boxes import predict_bounding_boxes
from utils.manga_ocr_utils import get_text_from_image
from utils.translate_manga import translate_manga
from utils.process_contour import process_contour
from utils.write_text_on_image import add_text
MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt"
object_detection_model = YOLO(MODEL_PATH)
def extract_text_from_regions(
image: np.ndarray, target_lang: str, results: list
) -> Dict[str, Any]:
for result in results:
x1, y1, x2, y2, _, _ = result
detected_image = image[int(y1) : int(y2), int(x1) : int(x2)]
if detected_image.shape[-1] == 4:
detected_image = detected_image[:, :, :3]
im = Image.fromarray(np.uint8(detected_image * 255))
text = get_text_from_image(im)
processed_image, cont = process_contour(detected_image)
translated_text = translate_manga(
text, target_lang=target_lang, source_lang="ja-JP"
)
if translated_text is None:
translated_text = "Translation failed"
add_text(processed_image, translated_text, cont)
def predict(image: np.ndarray, target_lang: str):
timestamp = str(int(time.time() * 1000000)) # Generate a unique timestamp
temp_filename = f"image_{timestamp}.png"
image = Image.fromarray(image)
image.save(temp_filename)
try:
np_image = np.array(image)
results = predict_bounding_boxes(object_detection_model, temp_filename)
extract_text_from_regions(np_image, target_lang, results)
return np_image
except Exception as e:
print(f"Error: {str(e)}")
return None
finally:
# Clean up the temporary file
if os.path.exists(temp_filename):
try:
os.remove(temp_filename)
except OSError as e:
print(f"Warning: Could not remove temporary file {temp_filename}: {e}")