Tamiloneto8 commited on
Commit
50375af
Β·
verified Β·
1 Parent(s): cb64ded

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -99
app.py CHANGED
@@ -2,125 +2,202 @@ import os
2
  import logging
3
  import gradio as gr
4
  from gradio_client import Client, handle_file
 
5
 
 
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
 
9
  token = os.getenv("HUGGINGFACE_TOKEN")
10
 
11
  def create_dubsync_interface():
12
- client = Client("Tamiloneto8/Test1", hf_token=token, verbose=True)
13
- target_langs = ["Assamese", "Bengali", "Gujarati", "Hindi", "Kannada", "Malayalam", "Marathi", "Odia", "Punjabi", "Tamil", "Telugu"]
14
-
15
- with gr.Blocks(title="DubSync Wizard", css=".step-header {font-size:1.5em; font-weight:bold; margin-bottom:10px;} .section {margin-top:20px;} .final-audio {margin-top:20px;}") as demo:
16
- gr.HTML("<div class='step-header'>Step 1: Upload Audio</div>")
17
- audio_input = gr.Audio(sources=["upload"], type="filepath", label="Upload your audio file")
18
- lang_dropdown = gr.Dropdown(target_langs, label="Select Target Language")
19
- start_btn = gr.Button("Start Processing", variant="primary")
20
-
21
- gr.HTML("<div class='step-header section'>Step 2: Edit Transcript & Translation</div>")
22
- transcription_audio = gr.Audio(type="filepath", label="Play Original Chunk", visible=False)
23
- transcription_text = gr.Textbox(label="Edit Transcription", lines=3, visible=False)
24
- translation_text = gr.Textbox(label="Edit Translation", lines=3, visible=False)
25
- next_chunk_btn = gr.Button("Next Chunk", visible=False)
26
-
27
- gr.HTML("<div class='step-header section'>Step 3: Dubbing & Review</div>")
28
- dubbing_audio = gr.Audio(type="filepath", label="Play Dubbed Chunk", visible=False)
29
- redub_btn = gr.Button("Redub Chunk", visible=False)
30
- finalize_btn = gr.Button("Finalize Chunk", visible=False)
31
-
32
- gr.HTML("<div class='step-header section'>Step 4: Merge & Download</div>")
33
- merge_btn = gr.Button("Merge All Chunks", visible=False)
34
- final_audio = gr.Audio(type="filepath", label="Final Merged Audio", visible=False)
35
-
36
- state = gr.State({"chunks": [], "index": 0})
37
 
38
- def start_processing(audio_file, lang):
39
- hf_audio = handle_file(audio_file)
40
- out = client.predict(audio_file=hf_audio, target_lang=lang, api_name="/process_audio_pipeline_step1")
41
- chunks = [{
42
- 'original': out[2][i], 'transcript': out[3][i], 'translate': out[4][i], 'dubbed': out[5][i]
43
- } for i in range(len(out[2]))]
44
- state.value = {"chunks": chunks, "index": 0}
45
- first = chunks[0]
46
  return (
47
- gr.update(value=first['original'], visible=True), # transcription_audio
48
- gr.update(value=first['transcript'], visible=True), # transcription_text
49
- gr.update(value=first['translate'], visible=True), # translation_text
50
- gr.update(visible=True) # next_chunk_btn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  )
52
 
53
- def go_next(state_dict):
54
- idx = state_dict['index'] + 1
55
- if idx < len(state_dict['chunks']):
56
- state_dict['index'] = idx
57
- chunk = state_dict['chunks'][idx]
58
- return (
59
- state_dict, # state
60
- gr.update(value=chunk['original'], visible=True), # transcription_audio
61
- gr.update(value=chunk['transcript'], visible=True), # transcription_text
62
- gr.update(value=chunk['translate'], visible=True), # translation_text
63
- gr.update(visible=True), # next_chunk_btn
64
- gr.update(visible=False), # dubbing_audio
65
- gr.update(visible=False), # redub_btn
66
- gr.update(visible=False) # finalize_btn
67
- )
68
- else:
69
- return (
70
- state_dict, # state
71
- gr.update(visible=False), # transcription_audio
72
- gr.update(visible=False), # transcription_text
73
- gr.update(visible=False), # translation_text
74
- gr.update(visible=False), # next_chunk_btn
75
- gr.update(value=state_dict['chunks'][state_dict['index']]['dubbed'], visible=True), # dubbing_audio
76
- gr.update(visible=True), # redub_btn
77
- gr.update(visible=True) # finalize_btn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  )
79
-
80
- def redub(state_dict):
81
- chunk = state_dict['chunks'][state_dict['index']]
82
- dubbed = client.predict(transcription=chunk['transcript'], translation=chunk['translate'], api_name="/generate_dubbed_chunk")
83
- state_dict['chunks'][state_dict['index']]['dubbed'] = dubbed
84
- return gr.update(value=dubbed, visible=True) # dubbing_audio
85
-
86
- def finalize(state_dict):
87
- state_dict['chunks'][state_dict['index']]['finalized'] = True
88
- if all(c.get('finalized', False) for c in state_dict['chunks']):
89
- return gr.update(visible=True) # merge_btn
90
- return gr.update(visible=False) # merge_btn
91
-
92
- def merge_all(state_dict):
93
- out = client.predict(api_name="/merge_audio_files")
94
- return gr.update(value=out[1], visible=True) # final_audio
95
-
96
- start_btn.click(
97
- start_processing,
 
 
 
 
 
 
 
 
 
98
  inputs=[audio_input, lang_dropdown],
99
- outputs=[transcription_audio, transcription_text, translation_text, next_chunk_btn]
100
  )
101
- next_chunk_btn.click(
102
- go_next,
103
- inputs=[state],
104
- outputs=[state, transcription_audio, transcription_text, translation_text, next_chunk_btn, dubbing_audio, redub_btn, finalize_btn]
 
105
  )
106
- redub_btn.click(
107
- redub,
108
- inputs=[state],
109
- outputs=[dubbing_audio]
 
110
  )
 
 
 
 
 
 
 
111
  finalize_btn.click(
112
- finalize,
113
- inputs=[state],
114
- outputs=[merge_btn]
115
  )
 
116
  merge_btn.click(
117
- merge_all,
118
- inputs=[state],
119
- outputs=[final_audio]
120
  )
121
-
122
  return demo
123
 
124
  if __name__ == "__main__":
125
- ui = create_dubsync_interface()
126
- ui.launch(show_error=True, share=False, server_name="0.0.0.0", server_port=7860)
 
 
 
 
2
  import logging
3
  import gradio as gr
4
  from gradio_client import Client, handle_file
5
+ import gradio.themes as gr_themes
6
 
7
+ # Set up logging
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
+ # It's recommended to set the HUGGINGFACE_TOKEN as an environment variable
12
  token = os.getenv("HUGGINGFACE_TOKEN")
13
 
14
  def create_dubsync_interface():
15
+ """
16
+ Creates and configures the Gradio interface for the DubSync application.
17
+ """
18
+ try:
19
+ # Connect to the Gradio client on Hugging Face Spaces
20
+ client = Client("Tamiloneto8/Test1", hf_token=token, verbose=True)
21
+ logger.info("Successfully connected to Gradio client.")
22
+
23
+ except Exception as e:
24
+ logger.error("Error connecting to the private space: %s", e, exc_info=True)
25
+ # Create a fallback interface to show the connection error
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("# 🎬 DubSync - Connection Error")
28
+ gr.Textbox(
29
+ value=f"Failed to connect to the Hugging Face Space. Please ensure the Space is running and the token is correct. Error: {e}",
30
+ label="Error",
31
+ interactive=False
32
+ )
33
+ return demo
 
 
 
 
 
 
34
 
35
+ # Define wrapper functions to call the API endpoints
36
+ def start_processing(audio_file, target_language):
37
+ if not audio_file or not target_language:
 
 
 
 
 
38
  return (
39
+ "Please provide both an audio file and a target language.",
40
+ None, "", "", None, "",
41
+ gr.update(visible=False), gr.update(visible=False)
42
+ )
43
+ try:
44
+ logger.info("Calling /process_audio_pipeline_step1 with file: %s", audio_file)
45
+ result = client.predict(
46
+ audio_file=handle_file(audio_file),
47
+ target_lang=target_language,
48
+ api_name="/process_audio_pipeline_step1"
49
+ )
50
+ logger.info("Received result from step 1: %s", result)
51
+
52
+ # API returns a 7-element tuple, we map it to our UI outputs
53
+ # [status, internal_val, orig_audio, trans, transl, dubbed_audio, progress]
54
+ return (
55
+ result[0], result[2], result[3], result[4], result[5], result[6],
56
+ gr.update(visible=True), gr.update(visible=True) # Make edit and merge sections visible
57
+ )
58
+ except Exception as e:
59
+ logger.error("Error in start_processing: %s", e, exc_info=True)
60
+ return (
61
+ f"Error starting the process: {e}",
62
+ None, "", "", None, "",
63
+ gr.update(visible=False), gr.update(visible=False)
64
  )
65
 
66
+ def navigate_chunk(transcription, translation, direction):
67
+ # The API uses different endpoints for next/previous navigation
68
+ # We assume /lambda is for previous and /lambda_1 is for next
69
+ api_to_call = "/lambda" if direction == "prev" else "/lambda_1"
70
+ try:
71
+ logger.info("Calling %s to navigate.", api_to_call)
72
+ result = client.predict(
73
+ t=transcription,
74
+ tr=translation,
75
+ api_name=api_to_call
76
+ )
77
+ logger.info("Received result from navigation: %s", result)
78
+ # API returns a 6-element tuple
79
+ # [internal_val, orig_audio, trans, transl, dubbed_audio, progress]
80
+ return result[1], result[2], result[3], result[4], result[5]
81
+ except Exception as e:
82
+ logger.error("Error navigating chunks: %s", e, exc_info=True)
83
+ return None, f"Error navigating chunks: {e}", "", None, ""
84
+
85
+ def generate_dubbed_chunk(transcription, translation):
86
+ if not transcription and not translation:
87
+ return None
88
+ try:
89
+ logger.info("Calling /generate_dubbed_chunk.")
90
+ dubbed_path = client.predict(
91
+ transcription=transcription,
92
+ translation=translation,
93
+ api_name="/generate_dubbed_chunk"
94
+ )
95
+ logger.info("Received dubbed chunk: %s", dubbed_path)
96
+ return dubbed_path
97
+ except Exception as e:
98
+ logger.error("Error generating dubbed chunk: %s", e, exc_info=True)
99
+ return None
100
+
101
+ def finalize_current_chunk():
102
+ try:
103
+ logger.info("Calling /finalize_current_chunk.")
104
+ progress = client.predict(api_name="/finalize_current_chunk")
105
+ logger.info("Received finalization progress: %s", progress)
106
+ return progress
107
+ except Exception as e:
108
+ logger.error("Error finalizing chunk: %s", e, exc_info=True)
109
+ return f"Error finalizing chunk: {e}"
110
+
111
+ def merge_all_chunks():
112
+ try:
113
+ logger.info("Calling /merge_audio_files.")
114
+ final_status, final_audio = client.predict(api_name="/merge_audio_files")
115
+ logger.info("Received final merged audio.")
116
+ return final_status, final_audio
117
+ except Exception as e:
118
+ logger.error("Error merging audio files: %s", e, exc_info=True)
119
+ return f"Error merging audio: {e}", None
120
+
121
+ # Define the Gradio Interface using Blocks for a custom layout
122
+ with gr.Blocks(theme=gr_themes.Soft(), title="DubSync - AI Audio Dubbing") as demo:
123
+ gr.Markdown("# 🎬 DubSync - AI Audio Dubbing Pipeline")
124
+ gr.Markdown("Transform your audio into another Indian language with full editing control.")
125
+
126
+ with gr.Row():
127
+ with gr.Column(scale=1):
128
+ gr.Markdown("### πŸ“€ Step 1: Upload & Configure")
129
+ audio_input = gr.Audio(sources=["upload"], type="filepath", label="🎡 Upload Audio File")
130
+ lang_dropdown = gr.Dropdown(
131
+ choices=["Assamese", "Bengali", "Gujarati", "Hindi", "Kannada", "Malayalam", "Marathi", "Odia", "Punjabi", "Tamil", "Telugu"],
132
+ label="🌐 Target Language"
133
  )
134
+ process_btn = gr.Button("🎯 Start Processing", variant="primary")
135
+
136
+ step1_output = gr.Textbox(label="πŸ“Š Processing Status", interactive=False)
137
+
138
+ with gr.Column(visible=False) as edit_section:
139
+ gr.Markdown("### ✏️ Step 2: Edit, Generate & Finalize Chunks")
140
+ with gr.Row():
141
+ prev_btn = gr.Button("◀️ Previous")
142
+ next_btn = gr.Button("Next ▢️")
143
+ original_audio = gr.Audio(label="Original Chunk Audio", type="filepath", interactive=False)
144
+ transcription_text = gr.Textbox(label="Transcription (edit if needed)", lines=2, interactive=True)
145
+ translation_text = gr.Textbox(label="Translation (edit if needed)", lines=2, interactive=True)
146
+ with gr.Row():
147
+ generate_btn = gr.Button("πŸ”Š Generate Dubbed Chunk")
148
+ finalize_btn = gr.Button("βœ”οΈ Finalize Chunk", variant="secondary")
149
+ dubbed_audio = gr.Audio(label="Dubbed Chunk Audio", type="filepath", interactive=False)
150
+ progress_text = gr.Textbox(label="Progress", interactive=False)
151
+
152
+ with gr.Row(visible=False) as merge_section:
153
+ gr.Markdown("### 🏁 Step 3: Merge Final Audio")
154
+ merge_btn = gr.Button("πŸš€ Merge All Finalized Chunks", variant="primary")
155
+
156
+ final_output = gr.Textbox(label="πŸŽ‰ Final Results", interactive=False)
157
+ output_audio = gr.Audio(label="πŸ”Š Final Dubbed Audio", type="filepath", interactive=False)
158
+
159
+ # Connect functions to UI components
160
+ process_btn.click(
161
+ fn=start_processing,
162
  inputs=[audio_input, lang_dropdown],
163
+ outputs=[step1_output, original_audio, transcription_text, translation_text, dubbed_audio, progress_text, edit_section, merge_section]
164
  )
165
+
166
+ prev_btn.click(
167
+ fn=lambda t, tr: navigate_chunk(t, tr, "prev"),
168
+ inputs=[transcription_text, translation_text],
169
+ outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text]
170
  )
171
+
172
+ next_btn.click(
173
+ fn=lambda t, tr: navigate_chunk(t, tr, "next"),
174
+ inputs=[transcription_text, translation_text],
175
+ outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text]
176
  )
177
+
178
+ generate_btn.click(
179
+ fn=generate_dubbed_chunk,
180
+ inputs=[transcription_text, translation_text],
181
+ outputs=[dubbed_audio]
182
+ )
183
+
184
  finalize_btn.click(
185
+ fn=finalize_current_chunk,
186
+ inputs=[],
187
+ outputs=[progress_text]
188
  )
189
+
190
  merge_btn.click(
191
+ fn=merge_all_chunks,
192
+ inputs=[],
193
+ outputs=[final_output, output_audio]
194
  )
195
+
196
  return demo
197
 
198
  if __name__ == "__main__":
199
+ dubsync_interface = create_dubsync_interface()
200
+ if dubsync_interface:
201
+ dubsync_interface.launch(show_error=True, share=False, server_name="0.0.0.0", server_port=7860)
202
+ else:
203
+ logger.error("Failed to create the Gradio interface.")