FaceXFormer-Demo / selfie_filter.py
thng292's picture
Upload selfie_filter.py
34f7d44 verified
import os
import numpy as np
import cv2
import landmark_detection
import gradio as gr
from mtcnn_facedetection import detect_faces
def apply_sunglasses(image, landmarks, sunglasses_img):
# If image loading fails or no landmarks, return original image
if sunglasses_img is None or not landmarks:
return image
# Create a copy of the image to overlay on
result = image.copy()
# Process each face
for face_landmarks in landmarks:
# We need at least the eye landmarks
if len(face_landmarks) < 5:
continue
# Get eye landmarks
left_eye_center = np.mean(face_landmarks[36:42], axis=0).astype(int)
right_eye_center = np.mean(face_landmarks[42:48], axis=0).astype(int)
# Calculate eye distance and angle
eye_distance = np.linalg.norm(right_eye_center - left_eye_center)
# Negate the angle to correct rotation direction
angle = -np.degrees(
np.arctan2(
right_eye_center[1] - left_eye_center[1],
right_eye_center[0] - left_eye_center[0],
)
)
# Size for sunglasses based on eye distance
width = int(eye_distance * 2.5)
height = int(width * sunglasses_img.shape[0] / sunglasses_img.shape[1])
# Resize sunglasses
sunglasses_resized = cv2.resize(sunglasses_img, (width, height))
# Rotate the sunglasses image
center = (width // 2, height // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
# Calculate new dimensions after rotation
cos = np.abs(rotation_matrix[0, 0])
sin = np.abs(rotation_matrix[0, 1])
new_width = int((height * sin) + (width * cos))
new_height = int((height * cos) + (width * sin))
# Adjust rotation matrix
rotation_matrix[0, 2] += (new_width / 2) - center[0]
rotation_matrix[1, 2] += (new_height / 2) - center[1]
# Perform the rotation
rotated_glasses = cv2.warpAffine(
sunglasses_resized,
rotation_matrix,
(new_width, new_height),
flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(0, 0, 0, 0),
)
# Position the sunglasses
eye_center = ((left_eye_center + right_eye_center) // 2).astype(int)
x = eye_center[0] - new_width // 2
y = eye_center[1] - new_height // 2
# Create ROI for overlay
y1, y2 = max(0, y), min(result.shape[0], y + new_height)
x1, x2 = max(0, x), min(result.shape[1], x + new_width)
# ROI in the glasses image
g_y1, g_y2 = max(0, -y), max(0, -y) + (y2 - y1)
g_x1, g_x2 = max(0, -x), max(0, -x) + (x2 - x1)
# Check if we have valid regions
if g_y2 <= rotated_glasses.shape[0] and g_x2 <= rotated_glasses.shape[1]:
roi = result[y1:y2, x1:x2]
glasses_roi = rotated_glasses[g_y1:g_y2, g_x1:g_x2]
# Apply alpha blending
if glasses_roi.shape[2] == 4 and roi.shape[:2] == glasses_roi.shape[:2]:
alpha = glasses_roi[:, :, 3] / 255.0
for c in range(3):
roi[:, :, c] = (
glasses_roi[:, :, c] * alpha + roi[:, :, c] * (1 - alpha)
).astype(np.uint8)
result[y1:y2, x1:x2] = roi
return result
def do_facial_landmark_recognition(
image: np.ndarray, face_boxes: list[landmark_detection.BoundingBox]
):
faces = landmark_detection.get_faces(image, face_boxes)
landmarks_batch = landmark_detection.get_landmarks(faces)
return landmarks_batch
def do_facial_landmark_recognition_with_mtcnn(image: np.ndarray, sunglasses_img):
face_boxes = detect_faces(image)
landmarks_batch = do_facial_landmark_recognition(image, face_boxes)
return apply_sunglasses(image, landmarks_batch, sunglasses_img)
def process_video(input_path, sunglasses_img):
output_path = os.path.join(
os.path.dirname(input_path), "output_" + os.path.basename(input_path)
)
# Open the input video
cap = cv2.VideoCapture(input_path)
if not cap.isOpened():
gr.Error(f"Error opening input video file: {input_path}")
return
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# Create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
frame_count = 0
# Process each frame
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Process the frame
processed_frame = do_facial_landmark_recognition_with_mtcnn(
frame, sunglasses_img
)
# Write the frame
out.write(processed_frame)
# Release resources
cap.release()
out.release()
gr.Info(f"Video processing complete. Output saved to: {output_path}")
return output_path