rlmoura commited on
Commit
a15460e
·
verified ·
1 Parent(s): 7d7740b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -67
app.py CHANGED
@@ -6,75 +6,75 @@ from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
6
  from transformers.utils import is_flash_attn_2_available
7
 
8
 
9
- # gr.load("models/openai/whisper-small").launch()
10
-
11
- pipe = None
12
-
13
- def extract_audio(video_path):
14
- output_audio_path = 'audio_extraido.wav'
15
- ffmpeg.input(video_path).output(output_audio_path).run()
16
-
17
- def create_pipe(model='openai/whisper-small'):
18
- if torch.cuda.is_available():
19
- device = "cuda:0"
20
- elif platform == "darwin":
21
- device = "mps"
22
- else:
23
- device = "cpu"
24
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
25
- model_id = model
26
-
27
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
28
- model_id,
29
- torch_dtype=torch_dtype,
30
- low_cpu_mem_usage=True,
31
- use_safetensors=True,
32
- attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
33
- )
34
- model.to(device)
35
-
36
- processor = AutoProcessor.from_pretrained(model_id)
37
-
38
- pipe = pipeline(
39
- "automatic-speech-recognition",
40
- model=model,
41
- tokenizer=processor.tokenizer,
42
- feature_extractor=processor.feature_extractor,
43
- torch_dtype=torch_dtype,
44
- device=device,
45
- )
46
- return pipe
47
-
48
- def transcribe_test(file, progress=gr.Progress()):
49
-
50
- global pipe
51
-
52
- progress(0, desc="Trabalhando..")
53
-
54
- generate_kwargs = {}
55
- generate_kwargs["language"] = "Portuguese"
56
- generate_kwargs["task"] = "transcribe"
57
-
58
- outputs = pipe(
59
- file,
60
- chunk_length_s=30,#30
61
- batch_size=24,#24
62
- generate_kwargs=generate_kwargs,
63
- # return_timestamps=True,
64
- )
65
 
66
- return outputs['chunks']
67
 
68
- with gr.Blocks(title="Para a Livia de Passos :)") as demo:
69
- description = "Vamos tentar transcrever o texto com a voz da samara..."
70
 
71
- transcribe = gr.Interface(fn=transcribe_test,
72
- description=description,
73
- inputs=[
74
- gr.File(label="Coloque o arquivo aquii", file_types=['.mp4','.mp3','.wav'])
75
- ],
76
- outputs=[gr.Text(label="Transcription"),])
77
 
78
 
79
- if __name__ == "__main__":
80
- demo.launch()
 
6
  from transformers.utils import is_flash_attn_2_available
7
 
8
 
9
+ gr.load("models/openai/whisper-small").launch()
10
+
11
+ # pipe = None
12
+
13
+ # def extract_audio(video_path):
14
+ # output_audio_path = 'audio_extraido.wav'
15
+ # ffmpeg.input(video_path).output(output_audio_path).run()
16
+
17
+ # def create_pipe(model='openai/whisper-small'):
18
+ # if torch.cuda.is_available():
19
+ # device = "cuda:0"
20
+ # elif platform == "darwin":
21
+ # device = "mps"
22
+ # else:
23
+ # device = "cpu"
24
+ # torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
25
+ # model_id = model
26
+
27
+ # model = AutoModelForSpeechSeq2Seq.from_pretrained(
28
+ # model_id,
29
+ # torch_dtype=torch_dtype,
30
+ # low_cpu_mem_usage=True,
31
+ # use_safetensors=True,
32
+ # attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
33
+ # )
34
+ # model.to(device)
35
+
36
+ # processor = AutoProcessor.from_pretrained(model_id)
37
+
38
+ # pipe = pipeline(
39
+ # "automatic-speech-recognition",
40
+ # model=model,
41
+ # tokenizer=processor.tokenizer,
42
+ # feature_extractor=processor.feature_extractor,
43
+ # torch_dtype=torch_dtype,
44
+ # device=device,
45
+ # )
46
+ # return pipe
47
+
48
+ # def transcribe_test(file, progress=gr.Progress()):
49
+
50
+ # global pipe
51
+
52
+ # progress(0, desc="Trabalhando..")
53
+
54
+ # generate_kwargs = {}
55
+ # generate_kwargs["language"] = "Portuguese"
56
+ # generate_kwargs["task"] = "transcribe"
57
+
58
+ # outputs = pipe(
59
+ # file,
60
+ # chunk_length_s=30,#30
61
+ # batch_size=24,#24
62
+ # generate_kwargs=generate_kwargs,
63
+ # # return_timestamps=True,
64
+ # )
65
 
66
+ # return outputs['chunks']
67
 
68
+ # with gr.Blocks(title="Para a Livia de Passos :)") as demo:
69
+ # description = "Vamos tentar transcrever o texto com a voz da samara..."
70
 
71
+ # transcribe = gr.Interface(fn=transcribe_test,
72
+ # description=description,
73
+ # inputs=[
74
+ # gr.File(label="Coloque o arquivo aquii", file_types=['.mp4','.mp3','.wav'])
75
+ # ],
76
+ # outputs=[gr.Text(label="Transcription"),])
77
 
78
 
79
+ # if __name__ == "__main__":
80
+ # demo.launch()