import os import torch import librosa from transformers import WhisperProcessor, WhisperForConditionalGeneration, GenerationConfig from transformers.utils import logging logging.set_verbosity_error() import warnings warnings.filterwarnings("ignore") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") model_path = "./whisper-small-english" processor = WhisperProcessor.from_pretrained(model_path) model = WhisperForConditionalGeneration.from_pretrained(model_path).to(device) model.config.forced_decoder_ids = None forced_decoder_ids = processor.get_decoder_prompt_ids(language="english", task="transcribe") gen_config = GenerationConfig.from_model_config(model.config) gen_config.forced_decoder_ids = None gen_config.language = None gen_config.task = None def get_latest_wav_file(directory): wav_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".wav")] if not wav_files: raise FileNotFoundError("No .wav files found in the directory.") latest_file = max(wav_files, key=os.path.getmtime) return latest_file def transcribe(audio_path): audio, _ = librosa.load(audio_path, sr=16000) input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device) predicted_ids = model.generate(input_features, generation_config=gen_config, forced_decoder_ids=forced_decoder_ids) return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] try: latest_audio = get_latest_wav_file("./User Input") print(f"Transcribing latest file: {latest_audio}") print("Transcription:", transcribe(latest_audio)) except FileNotFoundError as e: print("Error:", e)