Spaces:

rlmoura
/

samara-to-text

Sleeping

App Files Files Community

rlmoura commited on Aug 27, 2024

Commit

a15460e

verified ·

1 Parent(s): 7d7740b

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -67

app.py CHANGED Viewed

@@ -6,75 +6,75 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
 from transformers.utils import is_flash_attn_2_available
-# gr.load("models/openai/whisper-small").launch()
-pipe = None
-def extract_audio(video_path):
-    output_audio_path = 'audio_extraido.wav'
-    ffmpeg.input(video_path).output(output_audio_path).run()
-def create_pipe(model='openai/whisper-small'):
-    if torch.cuda.is_available():
-        device = "cuda:0"
-    elif platform == "darwin":
-        device = "mps"
-    else:
-        device = "cpu"
-    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    model_id = model
-    model = AutoModelForSpeechSeq2Seq.from_pretrained(
-        model_id,
-        torch_dtype=torch_dtype,
-        low_cpu_mem_usage=True,
-        use_safetensors=True,
-        attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
-    )
-    model.to(device)
-    processor = AutoProcessor.from_pretrained(model_id)
-    pipe = pipeline(
-        "automatic-speech-recognition",
-        model=model,
-        tokenizer=processor.tokenizer,
-        feature_extractor=processor.feature_extractor,
-        torch_dtype=torch_dtype,
-        device=device,
-    )
-    return pipe
-def transcribe_test(file, progress=gr.Progress()):
-    global pipe
-    progress(0, desc="Trabalhando..")
-    generate_kwargs = {}
-    generate_kwargs["language"] = "Portuguese"
-    generate_kwargs["task"] = "transcribe"
-    outputs = pipe(
-            file,
-            chunk_length_s=30,#30
-            batch_size=24,#24
-            generate_kwargs=generate_kwargs,
-            # return_timestamps=True,
-        )
-    return outputs['chunks']
-with gr.Blocks(title="Para a Livia de Passos :)") as demo:
-    description = "Vamos tentar transcrever o texto com a voz da samara..."
-    transcribe = gr.Interface(fn=transcribe_test,
-                              description=description,
-                              inputs=[
-                                gr.File(label="Coloque o arquivo aquii", file_types=['.mp4','.mp3','.wav'])
-                              ],
-                              outputs=[gr.Text(label="Transcription"),])
-if __name__ == "__main__":
-    demo.launch()

 from transformers.utils import is_flash_attn_2_available
+gr.load("models/openai/whisper-small").launch()
+# pipe = None
+# def extract_audio(video_path):
+#     output_audio_path = 'audio_extraido.wav'
+#     ffmpeg.input(video_path).output(output_audio_path).run()
+# def create_pipe(model='openai/whisper-small'):
+#     if torch.cuda.is_available():
+#         device = "cuda:0"
+#     elif platform == "darwin":
+#         device = "mps"
+#     else:
+#         device = "cpu"
+#     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+#     model_id = model
+#     model = AutoModelForSpeechSeq2Seq.from_pretrained(
+#         model_id,
+#         torch_dtype=torch_dtype,
+#         low_cpu_mem_usage=True,
+#         use_safetensors=True,
+#         attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
+#     )
+#     model.to(device)
+#     processor = AutoProcessor.from_pretrained(model_id)
+#     pipe = pipeline(
+#         "automatic-speech-recognition",
+#         model=model,
+#         tokenizer=processor.tokenizer,
+#         feature_extractor=processor.feature_extractor,
+#         torch_dtype=torch_dtype,
+#         device=device,
+#     )
+#     return pipe
+# def transcribe_test(file, progress=gr.Progress()):
+#     global pipe
+#     progress(0, desc="Trabalhando..")
+#     generate_kwargs = {}
+#     generate_kwargs["language"] = "Portuguese"
+#     generate_kwargs["task"] = "transcribe"
+#     outputs = pipe(
+#             file,
+#             chunk_length_s=30,#30
+#             batch_size=24,#24
+#             generate_kwargs=generate_kwargs,
+#             # return_timestamps=True,
+#         )
+#     return outputs['chunks']
+# with gr.Blocks(title="Para a Livia de Passos :)") as demo:
+#     description = "Vamos tentar transcrever o texto com a voz da samara..."
+#     transcribe = gr.Interface(fn=transcribe_test,
+#                               description=description,
+#                               inputs=[
+#                                 gr.File(label="Coloque o arquivo aquii", file_types=['.mp4','.mp3','.wav'])
+#                               ],
+#                               outputs=[gr.Text(label="Transcription"),])
+# if __name__ == "__main__":
+#     demo.launch()