import gradio as gr
import ffmpeg
import torch
from sys import platform
from transformers import pipeline, AutoModelForSpeechSeq2Seq, AutoProcessor
from transformers.utils import is_flash_attn_2_available


gr.load("models/openai/whisper-small").launch()

# pipe = None

# def extract_audio(video_path):
#     output_audio_path = 'audio_extraido.wav'
#     ffmpeg.input(video_path).output(output_audio_path).run()

# def create_pipe(model='openai/whisper-small'):
#     if torch.cuda.is_available():
#         device = "cuda:0"
#     elif platform == "darwin":
#         device = "mps"
#     else:
#         device = "cpu"
#     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
#     model_id = model

#     model = AutoModelForSpeechSeq2Seq.from_pretrained(
#         model_id,
#         torch_dtype=torch_dtype,
#         low_cpu_mem_usage=True,
#         use_safetensors=True,
#         attn_implementation="flash_attention_2" if is_flash_attn_2_available() else "sdpa",
#     )
#     model.to(device)

#     processor = AutoProcessor.from_pretrained(model_id)

#     pipe = pipeline(
#         "automatic-speech-recognition",
#         model=model,
#         tokenizer=processor.tokenizer,
#         feature_extractor=processor.feature_extractor,
#         torch_dtype=torch_dtype,
#         device=device,
#     )
#     return pipe

# def transcribe_test(file, progress=gr.Progress()):

#     global pipe

#     progress(0, desc="Trabalhando..")

#     generate_kwargs = {}
#     generate_kwargs["language"] = "Portuguese"
#     generate_kwargs["task"] = "transcribe"

#     outputs = pipe(
#             file,
#             chunk_length_s=30,#30
#             batch_size=24,#24
#             generate_kwargs=generate_kwargs,
#             # return_timestamps=True,
#         )
    
#     return outputs['chunks']

# with gr.Blocks(title="Para a Livia de Passos :)") as demo:
#     description = "Vamos tentar transcrever o texto com a voz da samara..."

#     transcribe = gr.Interface(fn=transcribe_test,
#                               description=description,
#                               inputs=[
#                                 gr.File(label="Coloque o arquivo aquii", file_types=['.mp4','.mp3','.wav'])  
#                               ],
#                               outputs=[gr.Text(label="Transcription"),])
    

# if __name__ == "__main__":
#     demo.launch()