Sonofica / app.py
janmayjay's picture
Update app.py
3046549 verified
raw
history blame
1.84 kB
import gradio as gr
import os
import shutil
import uuid
import subprocess
from pydub import AudioSegment
def convert_wav_for_browser(input_path, output_path):
audio = AudioSegment.from_file(input_path)
audio = audio.set_frame_rate(44100).set_sample_width(2).set_channels(2)
audio.export(output_path, format="wav")
# Function to handle image upload and transcription
def process_image(image):
# Save uploaded image to temp folder
temp_folder = "./temp_uploads"
os.makedirs(temp_folder, exist_ok=True)
# Generate unique filename
image_filename = f"{uuid.uuid4().hex}.jpg"
image_path = os.path.join(temp_folder, image_filename)
# Save image
image.save(image_path)
# Run your script with subprocess
try:
subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
except subprocess.CalledProcessError as e:
return f"Error during transcription: {e}", None
# Check if output.wav is generated
output_audio_path = "output.wav"
if os.path.exists(output_audio_path):
# Convert to browser-safe format
safe_audio_path = "output_safe.wav"
convert_wav_for_browser("output.wav", safe_audio_path)
return "Transcription complete. Playing audio...", safe_audio_path
else:
return "Failed to generate audio file.", None
# Gradio interface
iface = gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil", label="Upload a Manga Page"),
outputs=[
gr.Textbox(label="Status"),
gr.Audio(label="Generated Audio", type="filepath")
],
title="Manga Page Audio Transcription",
description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
)
# Launch app
if __name__ == "__main__":
iface.launch()