|
import gradio as gr |
|
import os |
|
import shutil |
|
import uuid |
|
import subprocess |
|
|
|
from pydub import AudioSegment |
|
|
|
def convert_wav_for_browser(input_path, output_path): |
|
audio = AudioSegment.from_file(input_path) |
|
audio = audio.set_frame_rate(44100).set_sample_width(2).set_channels(2) |
|
audio.export(output_path, format="wav") |
|
|
|
|
|
|
|
def process_image(image): |
|
|
|
temp_folder = "./temp_uploads" |
|
os.makedirs(temp_folder, exist_ok=True) |
|
|
|
|
|
image_filename = f"{uuid.uuid4().hex}.jpg" |
|
image_path = os.path.join(temp_folder, image_filename) |
|
|
|
|
|
image.save(image_path) |
|
|
|
|
|
try: |
|
subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True) |
|
except subprocess.CalledProcessError as e: |
|
return f"Error during transcription: {e}", None |
|
|
|
|
|
output_audio_path = "output.wav" |
|
if os.path.exists(output_audio_path): |
|
|
|
safe_audio_path = "output_safe.wav" |
|
convert_wav_for_browser("output.wav", safe_audio_path) |
|
return "Transcription complete. Playing audio...", safe_audio_path |
|
else: |
|
return "Failed to generate audio file.", None |
|
|
|
|
|
iface = gr.Interface( |
|
fn=process_image, |
|
inputs=gr.Image(type="pil", label="Upload a Manga Page"), |
|
outputs=[ |
|
gr.Textbox(label="Status"), |
|
gr.Audio(label="Generated Audio", type="filepath") |
|
], |
|
title="Manga Page Audio Transcription", |
|
description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script." |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
iface.launch() |
|
|