Interview / functions /models.py
Rahulk2197's picture
Upload 18 files
ee94b36 verified
import nltk
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
import os
from functions.fer import Model
import cv2
import dlib
from functions.valence_arousal import load_models
# Download necessary NLTK packages
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
# Device setup
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
models_folder=os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'models')
fer_model_path=os.path.join(models_folder,'22.6_AffectNet_10K_part2.pt')
val_ar_feat_path=os.path.join(models_folder,'resnet_features.pt')
valence_arousal_model=os.path.join(models_folder,'emotion_model.pt')
# Load Whisper model and processor
model_id = "openai/whisper-small"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
sentipipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment", device=device)
fer_model=Model(fps=30,fer_model=fer_model_path)
resnet,emotion_model=load_models(valence_arousal_model,val_ar_feat_path)
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_smile.xml')
dnn_net = cv2.dnn.readNetFromCaffe("models/deploy.prototxt", "models/res10_300x300_ssd_iter_140000.caffemodel")
predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks.dat")
models_dict={
'asrmodel':model,
'asrproc':processor,
'sentipipe':sentipipe,
'fer':fer_model,
"valence_fer":(resnet,emotion_model),
'smile_cascade':smile_cascade,
'face':(dnn_net,predictor)
}