janmayjay commited on
Commit
3046549
·
verified ·
1 Parent(s): 3c08f42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -47
app.py CHANGED
@@ -1,47 +1,58 @@
1
- import gradio as gr
2
- import os
3
- import shutil
4
- import uuid
5
- import subprocess
6
-
7
- # Function to handle image upload and transcription
8
- def process_image(image):
9
- # Save uploaded image to temp folder
10
- temp_folder = "./temp_uploads"
11
- os.makedirs(temp_folder, exist_ok=True)
12
-
13
- # Generate unique filename
14
- image_filename = f"{uuid.uuid4().hex}.jpg"
15
- image_path = os.path.join(temp_folder, image_filename)
16
-
17
- # Save image
18
- image.save(image_path)
19
-
20
- # Run your script with subprocess
21
- try:
22
- subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
23
- except subprocess.CalledProcessError as e:
24
- return f"Error during transcription: {e}", None
25
-
26
- # Check if output.wav is generated
27
- output_audio_path = "output.wav"
28
- if os.path.exists(output_audio_path):
29
- return "Transcription complete. Playing audio...", output_audio_path
30
- else:
31
- return "Failed to generate audio file.", None
32
-
33
- # Gradio interface
34
- iface = gr.Interface(
35
- fn=process_image,
36
- inputs=gr.Image(type="pil", label="Upload a Manga Page"),
37
- outputs=[
38
- gr.Textbox(label="Status"),
39
- gr.Audio(label="Generated Audio", type="filepath")
40
- ],
41
- title="Manga Page Audio Transcription",
42
- description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
43
- )
44
-
45
- # Launch app
46
- if __name__ == "__main__":
47
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import shutil
4
+ import uuid
5
+ import subprocess
6
+
7
+ from pydub import AudioSegment
8
+
9
+ def convert_wav_for_browser(input_path, output_path):
10
+ audio = AudioSegment.from_file(input_path)
11
+ audio = audio.set_frame_rate(44100).set_sample_width(2).set_channels(2)
12
+ audio.export(output_path, format="wav")
13
+
14
+
15
+ # Function to handle image upload and transcription
16
+ def process_image(image):
17
+ # Save uploaded image to temp folder
18
+ temp_folder = "./temp_uploads"
19
+ os.makedirs(temp_folder, exist_ok=True)
20
+
21
+ # Generate unique filename
22
+ image_filename = f"{uuid.uuid4().hex}.jpg"
23
+ image_path = os.path.join(temp_folder, image_filename)
24
+
25
+ # Save image
26
+ image.save(image_path)
27
+
28
+ # Run your script with subprocess
29
+ try:
30
+ subprocess.run(["python", "page_transcription.py", f"-img={image_path}"], check=True)
31
+ except subprocess.CalledProcessError as e:
32
+ return f"Error during transcription: {e}", None
33
+
34
+ # Check if output.wav is generated
35
+ output_audio_path = "output.wav"
36
+ if os.path.exists(output_audio_path):
37
+ # Convert to browser-safe format
38
+ safe_audio_path = "output_safe.wav"
39
+ convert_wav_for_browser("output.wav", safe_audio_path)
40
+ return "Transcription complete. Playing audio...", safe_audio_path
41
+ else:
42
+ return "Failed to generate audio file.", None
43
+
44
+ # Gradio interface
45
+ iface = gr.Interface(
46
+ fn=process_image,
47
+ inputs=gr.Image(type="pil", label="Upload a Manga Page"),
48
+ outputs=[
49
+ gr.Textbox(label="Status"),
50
+ gr.Audio(label="Generated Audio", type="filepath")
51
+ ],
52
+ title="Manga Page Audio Transcription",
53
+ description="Upload a manga image page, and this tool will transcribe and play the audio using a backend Python script."
54
+ )
55
+
56
+ # Launch app
57
+ if __name__ == "__main__":
58
+ iface.launch()