Spaces:
Sleeping
Sleeping
| import os | |
| import torch | |
| import torchvision | |
| import huggingface_hub | |
| from torchvision.transforms import InterpolationMode | |
| from network.models.facexformer import FaceXFormer | |
| from dataclasses import dataclass | |
| import numpy as np | |
| # import mediapipe as mp | |
| # import cv2 | |
| # device = "cuda:0" | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| dtype = torch.float32 | |
| weights_path = "ckpts/model.pt" | |
| # weights_path = "ckpts/pytorch_model.bin" | |
| # face_model_path = "ckpts/blaze_face_short_range.tflite" | |
| # import mediapipe as mp | |
| # BaseOptions = mp.tasks.BaseOptions | |
| # FaceDetector = mp.tasks.vision.FaceDetector | |
| # FaceDetectorOptions = mp.tasks.vision.FaceDetectorOptions | |
| # FaceDetectorResult = mp.tasks.vision.FaceDetectorResult | |
| # VisionRunningMode = mp.tasks.vision.RunningMode | |
| # options = FaceDetectorOptions( | |
| # base_options=BaseOptions(model_asset_path=face_model_path), | |
| # running_mode=VisionRunningMode.LIVE_STREAM, | |
| # ) | |
| # face_detector = FaceDetector.create_from_options(options) | |
| transforms_image = torchvision.transforms.Compose( | |
| [ | |
| torchvision.transforms.ToPILImage(), | |
| torchvision.transforms.Resize( | |
| size=(224, 224), interpolation=InterpolationMode.BICUBIC | |
| ), | |
| torchvision.transforms.ToTensor(), | |
| torchvision.transforms.Normalize( | |
| mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] | |
| ), | |
| ] | |
| ) | |
| def load_model(weights_path): | |
| model = FaceXFormer().to(device) | |
| if not os.path.exists(weights_path): | |
| huggingface_hub.hf_hub_download( | |
| "kartiknarayan/facexformer", | |
| "ckpts/model.pt", | |
| repo_type="model", | |
| local_dir=".", | |
| ) | |
| checkpoint = torch.load(weights_path, map_location=device) | |
| # model.load_state_dict(checkpoint) | |
| model.load_state_dict(checkpoint["state_dict_backbone"]) | |
| model = model.eval() | |
| model = model.to(dtype=dtype) | |
| # model = torch.compile(model, mode="reduce-overhead") | |
| return model | |
| model = load_model(weights_path) | |
| def adjust_bbox( | |
| x_min, y_min, x_max, y_max, image_width, image_height, margin_percentage=50 | |
| ): | |
| width = x_max - x_min | |
| height = y_max - y_min | |
| increase_width = width * (margin_percentage / 100.0) / 2 | |
| increase_height = height * (margin_percentage / 100.0) / 2 | |
| x_min_adjusted = int(max(0, x_min - increase_width)) | |
| y_min_adjusted = int(max(0, y_min - increase_height)) | |
| x_max_adjusted = int(min(image_width, x_max + increase_width)) | |
| y_max_adjusted = int(min(image_height, y_max + increase_height)) | |
| return x_min_adjusted, y_min_adjusted, x_max_adjusted, y_max_adjusted | |
| def denorm_points(points, h, w, align_corners=False): | |
| if align_corners: | |
| denorm_points = ( | |
| (points + 1) / 2 * torch.tensor([w - 1, h - 1]).to(points).view(1, 1, 2) | |
| ) | |
| else: | |
| denorm_points = ( | |
| (points + 1) * torch.tensor([w, h]).to(points).view(1, 1, 2) - 1 | |
| ) / 2 | |
| return denorm_points | |
| class BoundingBox: | |
| x_min: int | |
| y_min: int | |
| x_max: int | |
| y_max: int | |
| class FaceImg: | |
| image: np.ndarray | |
| x_min: int | |
| y_min: int | |
| def get_faces_img(img: np.ndarray, boxes: list[BoundingBox]): | |
| if boxes is None or len(boxes) == 0: | |
| return [] | |
| results: list[FaceImg] = [] | |
| for box in boxes: | |
| x_min, y_min, x_max, y_max = box.x_min, box.y_min, box.x_max, box.y_max | |
| # Padding | |
| x_min, y_min, x_max, y_max = adjust_bbox( | |
| x_min, y_min, x_max, y_max, img.shape[1], img.shape[0] | |
| ) | |
| image = img[y_min:y_max, x_min:x_max] | |
| results.append(FaceImg(image, int(x_min), int(y_min))) | |
| return results | |
| class Face: | |
| image: torch.Tensor | |
| x_min: int | |
| y_min: int | |
| original_w: int | |
| original_h: int | |
| def get_faces(img: np.ndarray, boxes: list[BoundingBox]): | |
| images = get_faces_img(img, boxes) | |
| images = [ | |
| Face( | |
| transforms_image(face_image.image), | |
| face_image.x_min, | |
| face_image.y_min, | |
| face_image.image.shape[1], | |
| face_image.image.shape[0], | |
| ) | |
| for face_image in images | |
| ] | |
| return images | |
| def get_landmarks(faces: list[Face]): | |
| if len(faces) == 0: | |
| return [] | |
| images = torch.stack([face.image for face in faces]).to(device=device, dtype=dtype) | |
| tasks = torch.tensor([1] * len(faces), device=device, dtype=dtype) | |
| with torch.inference_mode(): | |
| # with torch.amp.autocast("cuda"): | |
| ( | |
| batch_landmarks, | |
| headposes, | |
| attributes, | |
| visibilities, | |
| ages, | |
| geders, | |
| races, | |
| segs, | |
| ) = model.predict(images, None, tasks) | |
| batch_denormed = [ | |
| denorm_points(landmarks, face.original_h, face.original_w)[0] | |
| for landmarks, face in zip(batch_landmarks.view(-1, 68, 2), faces) | |
| ] | |
| results = [] | |
| for landmarks, face in zip(batch_denormed, faces): | |
| results.append( | |
| [(int(x + face.x_min), int(y + face.y_min)) for x, y in landmarks] | |
| ) | |
| return results | |