Spaces:

janmayjay
/

Sonofica

Running

App Files Files Community

Sonofica / app.py

janmayjay

Update app.py

3046549 verified about 1 month ago

raw

history blame

1.84 kB

	import gradio as gr
	import os
	import shutil
	import uuid
	import subprocess

	from pydub import AudioSegment

	def convert_wav_for_browser(input_path, output_path):
	audio = AudioSegment.from_file(input_path)
	audio = audio.set_frame_rate(44100).set_sample_width(2).set_channels(2)
	audio.export(output_path, format="wav")


	# Function to handle image upload and transcription
	def process_image(image):
	# Save uploaded image to temp folder
	temp_folder = "./temp_uploads"
	os.makedirs(temp_folder, exist_ok=True)

	# Generate unique filename
	image_filename = f"{uuid.uuid4().hex}.jpg"
	image_path = os.path.join(temp_folder, image_filename)

	# Save image
	image.save(image_path)

	# Run your script with subprocess
	try:
	subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
	except subprocess.CalledProcessError as e:
	return f"Error during transcription: {e}", None

	# Check if output.wav is generated
	output_audio_path = "output.wav"
	if os.path.exists(output_audio_path):
	# Convert to browser-safe format
	safe_audio_path = "output_safe.wav"
	convert_wav_for_browser("output.wav", safe_audio_path)
	return "Transcription complete. Playing audio...", safe_audio_path
	else:
	return "Failed to generate audio file.", None

	# Gradio interface
	iface = gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil", label="Upload a Manga Page"),
	outputs=[
	gr.Textbox(label="Status"),
	gr.Audio(label="Generated Audio", type="filepath")
	],
	title="Manga Page Audio Transcription",
	description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
	)

	# Launch app
	if __name__ == "__main__":
	iface.launch()