yolo12l-person-seg / sample_inference.py

Ryan Pfister

Add YOLO12l-seg person segmentation model with documentation and example code

58343f2 2 months ago

4.68 kB

	# sample_inference.py
	import argparse
	import torch
	from ultralytics import YOLO
	import cv2
	import numpy as np
	import json
	from PIL import Image

	def main():
	parser = argparse.ArgumentParser(description='Run person segmentation with YOLO12l-seg model')
	parser.add_argument('--model', type=str, default='yolo12l-person-seg.pt', help='Model path')
	parser.add_argument('--image', type=str, required=True, help='Image path for inference')
	parser.add_argument('--output', type=str, default='output.jpg', help='Output visualization image path')
	parser.add_argument('--json', type=str, default='detections.json', help='JSON output file for detection data')
	parser.add_argument('--conf', type=float, default=0.5, help='Confidence threshold')
	args = parser.parse_args()

	# Load the model
	model = YOLO(args.model)

	# Move to appropriate device if available
	if torch.cuda.is_available():
	print(f"Using CUDA device: {torch.cuda.get_device_name(0)}")
	model.to('cuda')
	device = 'cuda'
	use_half = True
	elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
	print("Using Apple Silicon MPS")
	model.to('mps')
	device = 'mps'
	use_half = False
	else:
	print("Using CPU")
	device = None
	use_half = False

	# Load and check input image
	try:
	img = Image.open(args.image)
	img_width, img_height = img.size
	print(f"Image dimensions: {img_width}x{img_height}")
	except Exception as e:
	print(f"Error opening image: {e}")
	return

	# Run inference
	if device == 'cuda':
	results = model(args.image, classes=0, conf=args.conf, device=device, half=use_half)
	elif device == 'mps':
	results = model(args.image, classes=0, conf=args.conf, device=device)
	else:
	results = model(args.image, classes=0, conf=args.conf)

	# Process results
	detections = []
	visualization_img = cv2.imread(args.image)

	for result in results:
	masks = result.masks
	boxes = result.boxes

	if boxes is None or len(boxes) == 0:
	print("No people detected in the image")
	return

	person_count = len(boxes)
	print(f"Detected {person_count} people")

	# Visualize and extract data
	if masks is not None:
	for i, (mask, box) in enumerate(zip(masks.xy, boxes)):
	confidence = float(box.conf[0])
	x1, y1, x2, y2 = map(int, box.xyxy[0])

	# Extract mask points
	polygon_points = mask.tolist()

	# Calculate percentages of image dimensions
	x_coords = [point[0] for point in polygon_points]
	y_coords = [point[1] for point in polygon_points]
	min_x, max_x = min(x_coords), max(x_coords)
	min_y, max_y = min(y_coords), max(y_coords)
	width_pct = (max_x - min_x) / img_width
	height_pct = (max_y - min_y) / img_height

	# Create detection record
	detection = {
	"id": i,
	"confidence": confidence,
	"box": [x1, y1, x2, y2],
	"points": polygon_points,
	"width_pct": width_pct,
	"height_pct": height_pct,
	}
	detections.append(detection)

	# Draw bounding box
	cv2.rectangle(visualization_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(visualization_img, f'Person: {confidence:.2f}', (x1, y1 - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

	# Draw segmentation mask
	color_mask = np.zeros_like(visualization_img, dtype=np.uint8)
	mask_points = np.array(polygon_points, dtype=np.int32)
	cv2.fillPoly(color_mask, [mask_points], (0, 0, 255))

	# Blend the mask with the original image
	visualization_img = cv2.addWeighted(visualization_img, 1.0, color_mask, 0.5, 0)

	# Save visualization
	cv2.imwrite(args.output, visualization_img)
	print(f"Visualization saved to {args.output}")

	# Save detection data to JSON
	with open(args.json, 'w') as f:
	json.dump({
	"person_count": person_count,
	"detections": detections
	}, f, indent=4)
	print(f"Detection data saved to {args.json}")

	if __name__ == "__main__":
	main()