Spaces:

janmayjay
/

Sonofica

Sleeping

App Files Files Community

janmayjay commited on 29 days ago

Commit

3046549

verified ·

1 Parent(s): 3c08f42

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -47

app.py CHANGED Viewed

@@ -1,47 +1,58 @@
-import gradio as gr
-import os
-import shutil
-import uuid
-import subprocess
-# Function to handle image upload and transcription
-def process_image(image):
-    # Save uploaded image to temp folder
-    temp_folder = "./temp_uploads"
-    os.makedirs(temp_folder, exist_ok=True)
-    # Generate unique filename
-    image_filename = f"{uuid.uuid4().hex}.jpg"
-    image_path = os.path.join(temp_folder, image_filename)
-    # Save image
-    image.save(image_path)
-    # Run your script with subprocess
-    try:
-        subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
-    except subprocess.CalledProcessError as e:
-        return f"Error during transcription: {e}", None
-    # Check if output.wav is generated
-    output_audio_path = "output.wav"
-    if os.path.exists(output_audio_path):
-        return "Transcription complete. Playing audio...", output_audio_path
-    else:
-        return "Failed to generate audio file.", None
-# Gradio interface
-iface = gr.Interface(
-    fn=process_image,
-    inputs=gr.Image(type="pil", label="Upload a Manga Page"),
-    outputs=[
-        gr.Textbox(label="Status"),
-        gr.Audio(label="Generated Audio", type="filepath")
-    ],
-    title="Manga Page Audio Transcription",
-    description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
-)
-# Launch app
-if __name__ == "__main__":
-    iface.launch()

+import gradio as gr
+import os
+import shutil
+import uuid
+import subprocess
+from pydub import AudioSegment
+def convert_wav_for_browser(input_path, output_path):
+    audio = AudioSegment.from_file(input_path)
+    audio = audio.set_frame_rate(44100).set_sample_width(2).set_channels(2)
+    audio.export(output_path, format="wav")
+# Function to handle image upload and transcription
+def process_image(image):
+    # Save uploaded image to temp folder
+    temp_folder = "./temp_uploads"
+    os.makedirs(temp_folder, exist_ok=True)
+    # Generate unique filename
+    image_filename = f"{uuid.uuid4().hex}.jpg"
+    image_path = os.path.join(temp_folder, image_filename)
+    # Save image
+    image.save(image_path)
+    # Run your script with subprocess
+    try:
+        subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
+    except subprocess.CalledProcessError as e:
+        return f"Error during transcription: {e}", None
+    # Check if output.wav is generated
+    output_audio_path = "output.wav"
+    if os.path.exists(output_audio_path):
+        # Convert to browser-safe format
+        safe_audio_path = "output_safe.wav"
+        convert_wav_for_browser("output.wav", safe_audio_path)
+        return "Transcription complete. Playing audio...", safe_audio_path
+    else:
+        return "Failed to generate audio file.", None
+# Gradio interface
+iface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil", label="Upload a Manga Page"),
+    outputs=[
+        gr.Textbox(label="Status"),
+        gr.Audio(label="Generated Audio", type="filepath")
+    ],
+    title="Manga Page Audio Transcription",
+    description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
+)
+# Launch app
+if __name__ == "__main__":
+    iface.launch()