import cv2 import torch import torchvision.transforms as transforms from PIL import Image import numpy as np import timm from tqdm import tqdm import torch.nn as nn import os import matplotlib.pyplot as plt import torch.nn.functional as F import dlib import pandas as pd # dnn_net = cv2.dnn.readNetFromCaffe("models/deploy.prototxt", "models/res10_300x300_ssd_iter_140000.caffemodel") # # Initialize dlib's facial landmark predictor # predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks.dat") def extract_face(image, net, predictor): # Prepare the image for DNN face detection (h, w) = image.shape[:2] blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) net.setInput(blob) detections = net.forward() # Loop over the detections for i in range(0, detections.shape[2]): confidence = detections[0, 0, i, 2] # Filter out weak detections if confidence > 0.5: box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # Convert bounding box to dlib rectangle format dlib_rect = dlib.rectangle(int(startX), int(startY), int(endX), int(endY)) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) landmarks = predictor(gray, dlib_rect) landmarks_np = np.array([[p.x, p.y] for p in landmarks.parts()]) x, y, w, h = cv2.boundingRect(landmarks_np) x -= 25 y -= 25 w += 50 h += 50 x = max(0, x) y = max(0, y) w = min(w, image.shape[1] - x) h = min(h, image.shape[0] - y) # Crop and resize the face try: face_crop = cv2.resize(face_crop, (224, 224)) except: face_crop = cv2.resize(image, (224, 224)) return face_crop return None class Model: def __init__(self,fps,fer_model): self.device="cuda" if torch.cuda.is_available() else "cpu" self.transform = transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])] ) self.fermodel= timm.create_model("tf_efficientnet_b0_ns", pretrained=False) self.fermodel.classifier = torch.nn.Identity() self.fermodel.classifier=nn.Sequential( nn.Linear(in_features=1280, out_features=7) ) self.fermodel = torch.load( fer_model, map_location=self.device) self.fermodel.to(self.device) self.class_labels = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"] self.emotion_reorder = { 0: 6, 1: 5, 2: 4, 3: 1, 4: 0, 5: 2, 6: 3, } self.label_dict = { 0: "angry", 1: "disgust", 2: "fear", 3: "happy", 4: "neutral", 5: "sad", 6: "surprised", } self.class_wise_frame_count=None self.emotion_count = [0] * 7 self.frame_count=0 self.fps=fps self.df=None self.faces_=0 def predict(self,frames): emotion_list=[] emt=[] for frame in tqdm(frames): if frame is not None: frame=np.copy(frame) face_pil = Image.fromarray( cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ) face_tensor = self.transform(face_pil).unsqueeze(0).to(self.device) with torch.no_grad(): output = self.fermodel(face_tensor) _, predicted = torch.max(output, 1) emotion = self.emotion_reorder[predicted.item()] if isinstance(emotion, np.ndarray): emotion = ( emotion.astype(float).item() if emotion.size == 1 else emotion.tolist() ) emotion = torch.tensor( [emotion], dtype=torch.float32 ) # Ensures it's a tensor emotion.to(self.device) emt.append(emotion) self.emotion_count[predicted.item()] += 1 label = f"{self.label_dict[predicted.item()]}" emotion_list.append(label) else: emt.append('frame error') emotion_list.append('frame error') return emotion_list,emt def get_data(self,emotion_list,emt): self.class_wise_frame_count = dict(zip(self.class_labels, self.emotion_count)) return emotion_list,self.class_wise_frame_count,emt def fer_predict(video_frames,fps,model): emotion_list,emt=model.predict(video_frames) return model.get_data(emotion_list,emt) def filter(list1,list2): filtered_list1 = [x for i, x in enumerate(list1) if list2[i]!='fnf'] filtered_list2 = [x for x in list2 if x!='fnf'] return [filtered_list1,filtered_list2] def plot_graph(x,y,var,path): y = [value if isinstance(value, (int, float)) else np.nan for value in y] print(len(y)) plt.plot(range(len(x)), y, linestyle='-') plt.xlabel('Frame') plt.ylabel(var) plt.title(f'{var} Values vs Frame') plt.savefig(path) plt.clf() # def save_frames(frames,folder_path): # for i in tqdm(range(len(frames))): # frame_filename = os.path.join(folder_path, f'frame_{i+1:04d}.jpg') # # Save the frame as a .jpg file # frame=cv2.cvtColor(frames[i],cv2.COLOR_BGR2RGB) # cv2.imwrite(frame_filename, frame)