Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -37,7 +37,7 @@ def create_DubIndic_interface():
|
|
| 37 |
if not audio_file or not target_language:
|
| 38 |
return (
|
| 39 |
"Please provide both an audio file and a target language.",
|
| 40 |
-
None, "", "", None, "",
|
| 41 |
gr.update(visible=False), gr.update(visible=False)
|
| 42 |
)
|
| 43 |
try:
|
|
@@ -48,20 +48,24 @@ def create_DubIndic_interface():
|
|
| 48 |
api_name="/process_audio_pipeline_step1"
|
| 49 |
)
|
| 50 |
logger.info("Received result from step 1: %s", result)
|
|
|
|
|
|
|
| 51 |
# [status, internal_val, orig_audio, trans, transl, dubbed_audio, progress]
|
| 52 |
return (
|
| 53 |
-
result[0], result[2], result[3], result[4], result[5], result[6],
|
| 54 |
-
gr.update(visible=True), gr.update(visible=True)
|
| 55 |
)
|
| 56 |
except Exception as e:
|
| 57 |
logger.error("Error in start_processing: %s", e, exc_info=True)
|
| 58 |
return (
|
| 59 |
f"Error starting the process: {e}",
|
| 60 |
-
None, "", "", None, "",
|
| 61 |
gr.update(visible=False), gr.update(visible=False)
|
| 62 |
)
|
| 63 |
|
| 64 |
def navigate_chunk(transcription, translation, direction):
|
|
|
|
|
|
|
| 65 |
api_to_call = "/lambda" if direction == "prev" else "/lambda_1"
|
| 66 |
try:
|
| 67 |
logger.info("Calling %s to navigate.", api_to_call)
|
|
@@ -70,11 +74,13 @@ def create_DubIndic_interface():
|
|
| 70 |
tr=translation,
|
| 71 |
api_name=api_to_call
|
| 72 |
)
|
|
|
|
|
|
|
| 73 |
# [internal_val, orig_audio, trans, transl, dubbed_audio, progress]
|
| 74 |
-
return result[1], result[2], result[3], result[4], result[5]
|
| 75 |
except Exception as e:
|
| 76 |
logger.error("Error navigating chunks: %s", e, exc_info=True)
|
| 77 |
-
return None, f"Error navigating chunks: {e}", "", None, ""
|
| 78 |
|
| 79 |
def generate_dubbed_chunk(transcription, translation):
|
| 80 |
if not transcription and not translation:
|
|
@@ -86,6 +92,7 @@ def create_DubIndic_interface():
|
|
| 86 |
translation=translation,
|
| 87 |
api_name="/generate_dubbed_chunk"
|
| 88 |
)
|
|
|
|
| 89 |
return dubbed_path
|
| 90 |
except Exception as e:
|
| 91 |
logger.error("Error generating dubbed chunk: %s", e, exc_info=True)
|
|
@@ -93,7 +100,9 @@ def create_DubIndic_interface():
|
|
| 93 |
|
| 94 |
def finalize_current_chunk():
|
| 95 |
try:
|
|
|
|
| 96 |
progress = client.predict(api_name="/finalize_current_chunk")
|
|
|
|
| 97 |
return progress
|
| 98 |
except Exception as e:
|
| 99 |
logger.error("Error finalizing chunk: %s", e, exc_info=True)
|
|
@@ -101,12 +110,15 @@ def create_DubIndic_interface():
|
|
| 101 |
|
| 102 |
def merge_all_chunks():
|
| 103 |
try:
|
|
|
|
| 104 |
final_status, final_audio = client.predict(api_name="/merge_audio_files")
|
|
|
|
| 105 |
return final_status, final_audio
|
| 106 |
except Exception as e:
|
| 107 |
logger.error("Error merging audio files: %s", e, exc_info=True)
|
| 108 |
return f"Error merging audio: {e}", None
|
| 109 |
|
|
|
|
| 110 |
custom_theme = gr_themes.Soft(
|
| 111 |
primary_hue="orange",
|
| 112 |
secondary_hue="red",
|
|
@@ -115,11 +127,57 @@ def create_DubIndic_interface():
|
|
| 115 |
button_primary_background_fill="linear-gradient(45deg, #f97316, #ef4444)",
|
| 116 |
button_primary_background_fill_hover="linear-gradient(45deg, #ea580c, #dc2626)",
|
| 117 |
button_primary_text_color="white",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
)
|
| 119 |
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
gr.Markdown("# π¬ DubIndic - AI Audio Dubbing Pipeline")
|
| 122 |
-
|
|
|
|
| 123 |
with gr.Row():
|
| 124 |
with gr.Column(scale=1):
|
| 125 |
gr.Markdown("### π€ Step 1: Upload & Configure")
|
|
@@ -129,16 +187,15 @@ def create_DubIndic_interface():
|
|
| 129 |
label="π Target Language"
|
| 130 |
)
|
| 131 |
process_btn = gr.Button("π― Start Processing", variant="primary")
|
| 132 |
-
|
| 133 |
step1_output = gr.Textbox(label="π Processing Status", interactive=False)
|
| 134 |
-
|
| 135 |
with gr.Column(visible=False) as edit_section:
|
| 136 |
gr.Markdown("### βοΈ Step 2: Edit, Generate & Finalize Chunks")
|
| 137 |
with gr.Row():
|
| 138 |
prev_btn = gr.Button("βοΈ Previous")
|
| 139 |
next_btn = gr.Button("Next βΆοΈ")
|
| 140 |
original_audio = gr.Audio(label="Original Chunk Audio", type="filepath", interactive=False)
|
| 141 |
-
download_original = gr.DownloadButton(label="πΎ Download Original Chunk", file=None)
|
| 142 |
transcription_text = gr.Textbox(label="Transcription (edit if needed)", lines=2, interactive=True)
|
| 143 |
translation_text = gr.Textbox(label="Translation (edit if needed)", lines=2, interactive=True)
|
| 144 |
with gr.Row():
|
|
@@ -146,11 +203,11 @@ def create_DubIndic_interface():
|
|
| 146 |
finalize_btn = gr.Button("βοΈ Finalize Chunk", variant="secondary")
|
| 147 |
dubbed_audio = gr.Audio(label="Dubbed Chunk Audio", type="filepath", interactive=False)
|
| 148 |
progress_text = gr.Textbox(label="Progress", interactive=False)
|
| 149 |
-
|
| 150 |
with gr.Row(visible=False) as merge_section:
|
| 151 |
gr.Markdown("### π Step 3: Merge Final Audio")
|
| 152 |
merge_btn = gr.Button("π Merge All Finalized Chunks", variant="primary")
|
| 153 |
-
|
| 154 |
final_output = gr.Textbox(label="π Final Results", interactive=False)
|
| 155 |
output_audio = gr.Audio(label="π Final Dubbed Audio", type="filepath", interactive=False)
|
| 156 |
|
|
@@ -158,33 +215,39 @@ def create_DubIndic_interface():
|
|
| 158 |
process_btn.click(
|
| 159 |
fn=start_processing,
|
| 160 |
inputs=[audio_input, lang_dropdown],
|
| 161 |
-
outputs=[step1_output, original_audio, transcription_text, translation_text, dubbed_audio, progress_text,
|
| 162 |
)
|
|
|
|
| 163 |
prev_btn.click(
|
| 164 |
fn=lambda t, tr: navigate_chunk(t, tr, "prev"),
|
| 165 |
inputs=[transcription_text, translation_text],
|
| 166 |
-
outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text
|
| 167 |
)
|
|
|
|
| 168 |
next_btn.click(
|
| 169 |
fn=lambda t, tr: navigate_chunk(t, tr, "next"),
|
| 170 |
inputs=[transcription_text, translation_text],
|
| 171 |
-
outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text
|
| 172 |
)
|
|
|
|
| 173 |
generate_btn.click(
|
| 174 |
fn=generate_dubbed_chunk,
|
| 175 |
inputs=[transcription_text, translation_text],
|
| 176 |
outputs=[dubbed_audio]
|
| 177 |
)
|
|
|
|
| 178 |
finalize_btn.click(
|
| 179 |
fn=finalize_current_chunk,
|
| 180 |
inputs=[],
|
| 181 |
outputs=[progress_text]
|
| 182 |
)
|
|
|
|
| 183 |
merge_btn.click(
|
| 184 |
fn=merge_all_chunks,
|
| 185 |
inputs=[],
|
| 186 |
outputs=[final_output, output_audio]
|
| 187 |
)
|
|
|
|
| 188 |
return demo
|
| 189 |
|
| 190 |
if __name__ == "__main__":
|
|
@@ -192,4 +255,4 @@ if __name__ == "__main__":
|
|
| 192 |
if DubIndic_interface:
|
| 193 |
DubIndic_interface.launch(show_error=True, share=False, server_name="0.0.0.0", server_port=7860)
|
| 194 |
else:
|
| 195 |
-
logger.error("Failed to create the Gradio interface.")
|
|
|
|
| 37 |
if not audio_file or not target_language:
|
| 38 |
return (
|
| 39 |
"Please provide both an audio file and a target language.",
|
| 40 |
+
None, "", "", None, "",
|
| 41 |
gr.update(visible=False), gr.update(visible=False)
|
| 42 |
)
|
| 43 |
try:
|
|
|
|
| 48 |
api_name="/process_audio_pipeline_step1"
|
| 49 |
)
|
| 50 |
logger.info("Received result from step 1: %s", result)
|
| 51 |
+
|
| 52 |
+
# API returns a 7-element tuple, we map it to our UI outputs
|
| 53 |
# [status, internal_val, orig_audio, trans, transl, dubbed_audio, progress]
|
| 54 |
return (
|
| 55 |
+
result[0], result[2], result[3], result[4], result[5], result[6],
|
| 56 |
+
gr.update(visible=True), gr.update(visible=True) # Make edit and merge sections visible
|
| 57 |
)
|
| 58 |
except Exception as e:
|
| 59 |
logger.error("Error in start_processing: %s", e, exc_info=True)
|
| 60 |
return (
|
| 61 |
f"Error starting the process: {e}",
|
| 62 |
+
None, "", "", None, "",
|
| 63 |
gr.update(visible=False), gr.update(visible=False)
|
| 64 |
)
|
| 65 |
|
| 66 |
def navigate_chunk(transcription, translation, direction):
|
| 67 |
+
# The API uses different endpoints for next/previous navigation
|
| 68 |
+
# We assume /lambda is for previous and /lambda_1 is for next
|
| 69 |
api_to_call = "/lambda" if direction == "prev" else "/lambda_1"
|
| 70 |
try:
|
| 71 |
logger.info("Calling %s to navigate.", api_to_call)
|
|
|
|
| 74 |
tr=translation,
|
| 75 |
api_name=api_to_call
|
| 76 |
)
|
| 77 |
+
logger.info("Received result from navigation: %s", result)
|
| 78 |
+
# API returns a 6-element tuple
|
| 79 |
# [internal_val, orig_audio, trans, transl, dubbed_audio, progress]
|
| 80 |
+
return result[1], result[2], result[3], result[4], result[5]
|
| 81 |
except Exception as e:
|
| 82 |
logger.error("Error navigating chunks: %s", e, exc_info=True)
|
| 83 |
+
return None, f"Error navigating chunks: {e}", "", None, ""
|
| 84 |
|
| 85 |
def generate_dubbed_chunk(transcription, translation):
|
| 86 |
if not transcription and not translation:
|
|
|
|
| 92 |
translation=translation,
|
| 93 |
api_name="/generate_dubbed_chunk"
|
| 94 |
)
|
| 95 |
+
logger.info("Received dubbed chunk: %s", dubbed_path)
|
| 96 |
return dubbed_path
|
| 97 |
except Exception as e:
|
| 98 |
logger.error("Error generating dubbed chunk: %s", e, exc_info=True)
|
|
|
|
| 100 |
|
| 101 |
def finalize_current_chunk():
|
| 102 |
try:
|
| 103 |
+
logger.info("Calling /finalize_current_chunk.")
|
| 104 |
progress = client.predict(api_name="/finalize_current_chunk")
|
| 105 |
+
logger.info("Received finalization progress: %s", progress)
|
| 106 |
return progress
|
| 107 |
except Exception as e:
|
| 108 |
logger.error("Error finalizing chunk: %s", e, exc_info=True)
|
|
|
|
| 110 |
|
| 111 |
def merge_all_chunks():
|
| 112 |
try:
|
| 113 |
+
logger.info("Calling /merge_audio_files.")
|
| 114 |
final_status, final_audio = client.predict(api_name="/merge_audio_files")
|
| 115 |
+
logger.info("Received final merged audio.")
|
| 116 |
return final_status, final_audio
|
| 117 |
except Exception as e:
|
| 118 |
logger.error("Error merging audio files: %s", e, exc_info=True)
|
| 119 |
return f"Error merging audio: {e}", None
|
| 120 |
|
| 121 |
+
# Create custom theme with orange-red gradient colors
|
| 122 |
custom_theme = gr_themes.Soft(
|
| 123 |
primary_hue="orange",
|
| 124 |
secondary_hue="red",
|
|
|
|
| 127 |
button_primary_background_fill="linear-gradient(45deg, #f97316, #ef4444)",
|
| 128 |
button_primary_background_fill_hover="linear-gradient(45deg, #ea580c, #dc2626)",
|
| 129 |
button_primary_text_color="white",
|
| 130 |
+
block_background_fill="rgba(255, 255, 255, 0.05)",
|
| 131 |
+
block_border_color="rgba(249, 115, 22, 0.2)",
|
| 132 |
+
input_background_fill="rgba(255, 255, 255, 0.9)",
|
| 133 |
+
input_border_color="rgba(249, 115, 22, 0.3)",
|
| 134 |
+
input_border_color_focus="rgba(239, 68, 68, 0.6)"
|
| 135 |
)
|
| 136 |
|
| 137 |
+
# Define the Gradio Interface using Blocks for a custom layout
|
| 138 |
+
with gr.Blocks(theme=custom_theme, title="DubIndic - AI Audio Dubbing", css="""
|
| 139 |
+
.gradio-container {
|
| 140 |
+
background: linear-gradient(135deg, rgba(249, 115, 22, 0.1), rgba(239, 68, 68, 0.1));
|
| 141 |
+
}
|
| 142 |
+
.gr-button[variant="primary"] {
|
| 143 |
+
background: linear-gradient(45deg, #f97316, #ef4444) !important;
|
| 144 |
+
border: none !important;
|
| 145 |
+
color: white !important;
|
| 146 |
+
font-weight: bold !important;
|
| 147 |
+
}
|
| 148 |
+
.gr-button[variant="primary"]:hover {
|
| 149 |
+
background: linear-gradient(45deg, #ea580c, #dc2626) !important;
|
| 150 |
+
transform: translateY(-1px);
|
| 151 |
+
box-shadow: 0 4px 8px rgba(239, 68, 68, 0.3);
|
| 152 |
+
}
|
| 153 |
+
.gr-button[variant="secondary"] {
|
| 154 |
+
background: linear-gradient(45deg, rgba(249, 115, 22, 0.1), rgba(239, 68, 68, 0.1)) !important;
|
| 155 |
+
border: 2px solid #f97316 !important;
|
| 156 |
+
color: #f97316 !important;
|
| 157 |
+
font-weight: bold !important;
|
| 158 |
+
}
|
| 159 |
+
.gr-button[variant="secondary"]:hover {
|
| 160 |
+
background: linear-gradient(45deg, #f97316, #ef4444) !important;
|
| 161 |
+
color: white !important;
|
| 162 |
+
}
|
| 163 |
+
h1 {
|
| 164 |
+
background: linear-gradient(45deg, #f97316, #ef4444);
|
| 165 |
+
-webkit-background-clip: text;
|
| 166 |
+
-webkit-text-fill-color: transparent;
|
| 167 |
+
background-clip: text;
|
| 168 |
+
font-weight: bold;
|
| 169 |
+
}
|
| 170 |
+
.gr-textbox {
|
| 171 |
+
border: 2px solid rgba(249, 115, 22, 0.3) !important;
|
| 172 |
+
}
|
| 173 |
+
.gr-textbox:focus {
|
| 174 |
+
border-color: #ef4444 !important;
|
| 175 |
+
box-shadow: 0 0 0 3px rgba(239, 68, 68, 0.1) !important;
|
| 176 |
+
}
|
| 177 |
+
""") as demo:
|
| 178 |
gr.Markdown("# π¬ DubIndic - AI Audio Dubbing Pipeline")
|
| 179 |
+
gr.Markdown("Transform your audio into another Indian language with full editing control.")
|
| 180 |
+
|
| 181 |
with gr.Row():
|
| 182 |
with gr.Column(scale=1):
|
| 183 |
gr.Markdown("### π€ Step 1: Upload & Configure")
|
|
|
|
| 187 |
label="π Target Language"
|
| 188 |
)
|
| 189 |
process_btn = gr.Button("π― Start Processing", variant="primary")
|
| 190 |
+
|
| 191 |
step1_output = gr.Textbox(label="π Processing Status", interactive=False)
|
| 192 |
+
|
| 193 |
with gr.Column(visible=False) as edit_section:
|
| 194 |
gr.Markdown("### βοΈ Step 2: Edit, Generate & Finalize Chunks")
|
| 195 |
with gr.Row():
|
| 196 |
prev_btn = gr.Button("βοΈ Previous")
|
| 197 |
next_btn = gr.Button("Next βΆοΈ")
|
| 198 |
original_audio = gr.Audio(label="Original Chunk Audio", type="filepath", interactive=False)
|
|
|
|
| 199 |
transcription_text = gr.Textbox(label="Transcription (edit if needed)", lines=2, interactive=True)
|
| 200 |
translation_text = gr.Textbox(label="Translation (edit if needed)", lines=2, interactive=True)
|
| 201 |
with gr.Row():
|
|
|
|
| 203 |
finalize_btn = gr.Button("βοΈ Finalize Chunk", variant="secondary")
|
| 204 |
dubbed_audio = gr.Audio(label="Dubbed Chunk Audio", type="filepath", interactive=False)
|
| 205 |
progress_text = gr.Textbox(label="Progress", interactive=False)
|
| 206 |
+
|
| 207 |
with gr.Row(visible=False) as merge_section:
|
| 208 |
gr.Markdown("### π Step 3: Merge Final Audio")
|
| 209 |
merge_btn = gr.Button("π Merge All Finalized Chunks", variant="primary")
|
| 210 |
+
|
| 211 |
final_output = gr.Textbox(label="π Final Results", interactive=False)
|
| 212 |
output_audio = gr.Audio(label="π Final Dubbed Audio", type="filepath", interactive=False)
|
| 213 |
|
|
|
|
| 215 |
process_btn.click(
|
| 216 |
fn=start_processing,
|
| 217 |
inputs=[audio_input, lang_dropdown],
|
| 218 |
+
outputs=[step1_output, original_audio, transcription_text, translation_text, dubbed_audio, progress_text, edit_section, merge_section]
|
| 219 |
)
|
| 220 |
+
|
| 221 |
prev_btn.click(
|
| 222 |
fn=lambda t, tr: navigate_chunk(t, tr, "prev"),
|
| 223 |
inputs=[transcription_text, translation_text],
|
| 224 |
+
outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text]
|
| 225 |
)
|
| 226 |
+
|
| 227 |
next_btn.click(
|
| 228 |
fn=lambda t, tr: navigate_chunk(t, tr, "next"),
|
| 229 |
inputs=[transcription_text, translation_text],
|
| 230 |
+
outputs=[original_audio, transcription_text, translation_text, dubbed_audio, progress_text]
|
| 231 |
)
|
| 232 |
+
|
| 233 |
generate_btn.click(
|
| 234 |
fn=generate_dubbed_chunk,
|
| 235 |
inputs=[transcription_text, translation_text],
|
| 236 |
outputs=[dubbed_audio]
|
| 237 |
)
|
| 238 |
+
|
| 239 |
finalize_btn.click(
|
| 240 |
fn=finalize_current_chunk,
|
| 241 |
inputs=[],
|
| 242 |
outputs=[progress_text]
|
| 243 |
)
|
| 244 |
+
|
| 245 |
merge_btn.click(
|
| 246 |
fn=merge_all_chunks,
|
| 247 |
inputs=[],
|
| 248 |
outputs=[final_output, output_audio]
|
| 249 |
)
|
| 250 |
+
|
| 251 |
return demo
|
| 252 |
|
| 253 |
if __name__ == "__main__":
|
|
|
|
| 255 |
if DubIndic_interface:
|
| 256 |
DubIndic_interface.launch(show_error=True, share=False, server_name="0.0.0.0", server_port=7860)
|
| 257 |
else:
|
| 258 |
+
logger.error("Failed to create the Gradio interface.")
|