import nltk import torch from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline import os from functions.fer import Model import cv2 import dlib from functions.valence_arousal import load_models # Download necessary NLTK packages nltk.download('punkt') nltk.download('averaged_perceptron_tagger') # Device setup device = "cuda:0" if torch.cuda.is_available() else "cpu" torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 models_folder=os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models') fer_model_path=os.path.join(models_folder,'22.6_AffectNet_10K_part2.pt') val_ar_feat_path=os.path.join(models_folder,'resnet_features.pt') valence_arousal_model=os.path.join(models_folder,'emotion_model.pt') # Load Whisper model and processor model_id = "openai/whisper-small" model = AutoModelForSpeechSeq2Seq.from_pretrained( model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True ) model.to(device) processor = AutoProcessor.from_pretrained(model_id) sentipipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment", device=device) fer_model=Model(fps=30,fer_model=fer_model_path) resnet,emotion_model=load_models(valence_arousal_model,val_ar_feat_path) smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml') dnn_net = cv2.dnn.readNetFromCaffe("models/deploy.prototxt", "models/res10_300x300_ssd_iter_140000.caffemodel") predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks.dat") models_dict={ 'asrmodel':model, 'asrproc':processor, 'sentipipe':sentipipe, 'fer':fer_model, "valence_fer":(resnet,emotion_model), 'smile_cascade':smile_cascade, 'face':(dnn_net,predictor) }