|
import gradio as gr
|
|
import os
|
|
import shutil
|
|
import uuid
|
|
import subprocess
|
|
|
|
|
|
def process_image(image):
|
|
|
|
temp_folder = "./temp_uploads"
|
|
os.makedirs(temp_folder, exist_ok=True)
|
|
|
|
|
|
image_filename = f"{uuid.uuid4().hex}.jpg"
|
|
image_path = os.path.join(temp_folder, image_filename)
|
|
|
|
|
|
image.save(image_path)
|
|
|
|
|
|
try:
|
|
subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
|
|
except subprocess.CalledProcessError as e:
|
|
return f"Error during transcription: {e}", None
|
|
|
|
|
|
output_audio_path = "output.wav"
|
|
if os.path.exists(output_audio_path):
|
|
return "Transcription complete. Playing audio...", output_audio_path
|
|
else:
|
|
return "Failed to generate audio file.", None
|
|
|
|
|
|
iface = gr.Interface(
|
|
fn=process_image,
|
|
inputs=gr.Image(type="pil", label="Upload a Manga Page"),
|
|
outputs=[
|
|
gr.Textbox(label="Status"),
|
|
gr.Audio(label="Generated Audio", type="filepath")
|
|
],
|
|
title="Manga Page Audio Transcription",
|
|
description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
iface.launch()
|
|
|