innodatalabs
/

fed-vitpose

Model card Files Files and versions

MatthewTL commited on 25 days ago

Commit

f9f18ae

·

1 Parent(s): c433a92

Add ViTPoser intference API

Files changed (2) hide show

inference.py +45 -0
requirements.txt +3 -0

inference.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from transformers import (
+    VitPoseForPoseEstimation,
+    AutoProcessor,
+    RTDetrForObjectDetection,
+)
+from PIL import Image
+import torch
+# load models
+det_proc = AutoProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
+det_model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd_coco_o365").eval()
+pose_proc = AutoProcessor.from_pretrained("usyd-community/vitpose-base-simple")
+pose_model = VitPoseForPoseEstimation.from_pretrained("usyd-community/vitpose-base-simple").eval()
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+det_model.to(device)
+pose_model.to(device)
+# Hugging Face will call this function
+def predict(inputs: dict) -> dict:
+    """
+    inputs: {"image": PIL.Image}
+    returns: {"poses": ...}
+    """
+    image = inputs["image"]
+    # detect people
+    det_inputs = det_proc(images=image, return_tensors="pt").to(device)
+    det_outputs = det_model(**det_inputs)
+    results = det_proc.post_process_object_detection(
+        det_outputs,
+        threshold=0.5,
+        target_sizes=[(image.height, image.width)]
+    )
+    # keep only "person" class (label 0)
+    person_boxes = results[0]["boxes"][results[0]["labels"] == 0]
+    # run pose estimation
+    pose_inputs = pose_proc(image, boxes=[person_boxes], return_tensors="pt").to(device)
+    with torch.no_grad():
+        pose_outputs = pose_model(**pose_inputs)
+    poses = pose_proc.post_process_pose_estimation(pose_outputs, boxes=[person_boxes])
+    return {"poses": poses[0]}

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers>=4.43.0
+Pillow