Spaces:

nvidia
/

canary-1b-flash

Running on Zero

App Files Files Community

erastorgueva-nv commited on Mar 19

Commit

581b819

1 Parent(s): 674db8f

gr.Error doesnt display in function decorated by spaces.GPU - workaround by making decorated function smaller

Browse files

Files changed (1) hide show

app.py +37 -40

app.py CHANGED Viewed

@@ -63,7 +63,40 @@ def convert_audio(audio_filepath, tmpdir, utt_id):
 	return out_filename, duration
 @spaces.GPU
-def transcribe(audio_filepath, src_lang, tgt_lang, pnc, gen_ts):
 	if audio_filepath is None:
 		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
@@ -149,25 +182,7 @@ def transcribe(audio_filepath, src_lang, tgt_lang, pnc, gen_ts):
 		if gen_ts == "yes": # if will generate timestamps
-			if duration < 10:
-				output = model.transcribe(manifest_filepath)
-			else:
-				frame_asr = FrameBatchMultiTaskAED(
-					asr_model=model,
-					frame_len=10.0,
-					total_buffer=10.0,
-					batch_size=16,
-				)
-				output = get_buffered_pred_feat_multitaskAED(
-					frame_asr,
-					model.cfg.preprocessor,
-					model_stride_in_secs,
-					model.device,
-					manifest=manifest_filepath,
-					filepaths=None,
-				)
 			# process output to get word and segment level timestamps
 			word_level_timestamps = output[0].timestamp["word"]
@@ -186,25 +201,7 @@ def transcribe(audio_filepath, src_lang, tgt_lang, pnc, gen_ts):
 			output_html += "</div>\n"
 		else: # if will not generate timestamps
-			if duration < 40:
-				output = model.transcribe(manifest_filepath)
-			else: # do buffered inference
-				frame_asr = FrameBatchMultiTaskAED(
-					asr_model=model,
-					frame_len=40.0,
-					total_buffer=40.0,
-					batch_size=16,
-				)
-				output = get_buffered_pred_feat_multitaskAED(
-					frame_asr,
-					model.cfg.preprocessor,
-					model_stride_in_secs,
-					model.device,
-					manifest=manifest_filepath,
-					filepaths=None,
-				)
 			if taskname == "asr":
 				output_html += "<div class='heading'>Transcript</div>\n"
@@ -403,7 +400,7 @@ with gr.Blocks(
 		)
 	go_button.click(
-		fn=transcribe,
 		inputs = [audio_file, src_lang, tgt_lang, pnc, gen_ts],
 		outputs = [model_output_html]
 	)

 	return out_filename, duration
 @spaces.GPU
+def transcribe(manifest_filepath, model, model_stride_in_secs, audio_duration, duration_limit):
+	"""
+	Transcribe audio using either model.transcribe or buffered inference.
+	Duration limit determines which method to use and what chunk size will
+	be used in the case of buffered inference.
+	Note: I have observed that if you try to throw a gr.Error inside a function
+	decorated with @spaces.GPU, the error message you specified in gr.Error will
+	not be shown, instead it show the message "ZeroGPU worker error".
+	"""
+	if audio_duration < duration_limit:
+		output = model.transcribe(manifest_filepath)
+	else:
+		frame_asr = FrameBatchMultiTaskAED(
+			asr_model=model,
+			frame_len=duration_limit,
+			total_buffer=duration_limit,
+			batch_size=16,
+		)
+		output = get_buffered_pred_feat_multitaskAED(
+			frame_asr,
+			model.cfg.preprocessor,
+			model_stride_in_secs,
+			model.device,
+			manifest=manifest_filepath,
+			filepaths=None,
+		)
+	return output
+def on_go_btn_click(audio_filepath, src_lang, tgt_lang, pnc, gen_ts):
 	if audio_filepath is None:
 		raise gr.Error("Please provide some input audio: either upload an audio file or use the microphone")
 		if gen_ts == "yes": # if will generate timestamps
+			output = transcribe(manifest_filepath, model, model_stride_in_secs, audio_duration=duration, duration_limit=10.0)
 			# process output to get word and segment level timestamps
 			word_level_timestamps = output[0].timestamp["word"]
 			output_html += "</div>\n"
 		else: # if will not generate timestamps
+			output = transcribe(manifest_filepath, model, model_stride_in_secs, audio_duration=duration, duration_limit=40.0)
 			if taskname == "asr":
 				output_html += "<div class='heading'>Transcript</div>\n"
 		)
 	go_button.click(
+		fn=on_go_btn_click,
 		inputs = [audio_file, src_lang, tgt_lang, pnc, gen_ts],
 		outputs = [model_output_html]
 	)