Spaces:

merve
/

grounding_sam_inpainting

Runtime error

App Files Files Community

grounding_sam_inpainting / EfficientSAM /grounded_efficient_sam.py

merve HF Staff

Upload 219 files

9cc3ad8 almost 2 years ago

raw

history blame contribute delete

3.91 kB

	import cv2
	import numpy as np
	import supervision as sv

	import torch
	import torchvision
	from torchvision.transforms import ToTensor

	from groundingdino.util.inference import Model

	DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	# GroundingDINO config and checkpoint
	GROUNDING_DINO_CONFIG_PATH = "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
	GROUNDING_DINO_CHECKPOINT_PATH = "./groundingdino_swint_ogc.pth"

	# Building GroundingDINO inference model
	grounding_dino_model = Model(model_config_path=GROUNDING_DINO_CONFIG_PATH, model_checkpoint_path=GROUNDING_DINO_CHECKPOINT_PATH)

	# Building MobileSAM predictor
	EFFICIENT_SAM_CHECHPOINT_PATH = "./EfficientSAM/efficientsam_s_gpu.jit"
	efficientsam = torch.jit.load(EFFICIENT_SAM_CHECHPOINT_PATH)


	# Predict classes and hyper-param for GroundingDINO
	SOURCE_IMAGE_PATH = "./EfficientSAM/LightHQSAM/example_light_hqsam.png"
	CLASSES = ["bench"]
	BOX_THRESHOLD = 0.25
	TEXT_THRESHOLD = 0.25
	NMS_THRESHOLD = 0.8


	# load image
	image = cv2.imread(SOURCE_IMAGE_PATH)

	# detect objects
	detections = grounding_dino_model.predict_with_classes(
	image=image,
	classes=CLASSES,
	box_threshold=BOX_THRESHOLD,
	text_threshold=BOX_THRESHOLD
	)

	# annotate image with detections
	box_annotator = sv.BoxAnnotator()
	labels = [
	f"{CLASSES[class_id]} {confidence:0.2f}"
	for _, _, confidence, class_id, _
	in detections]
	annotated_frame = box_annotator.annotate(scene=image.copy(), detections=detections, labels=labels)

	# save the annotated grounding dino image
	cv2.imwrite("EfficientSAM/LightHQSAM/groundingdino_annotated_image.jpg", annotated_frame)


	# NMS post process
	print(f"Before NMS: {len(detections.xyxy)} boxes")
	nms_idx = torchvision.ops.nms(
	torch.from_numpy(detections.xyxy),
	torch.from_numpy(detections.confidence),
	NMS_THRESHOLD
	).numpy().tolist()

	detections.xyxy = detections.xyxy[nms_idx]
	detections.confidence = detections.confidence[nms_idx]
	detections.class_id = detections.class_id[nms_idx]

	print(f"After NMS: {len(detections.xyxy)} boxes")


	def efficient_sam_box_prompt_segment(image, pts_sampled, model):
	bbox = torch.reshape(torch.tensor(pts_sampled), [1, 1, 2, 2])
	bbox_labels = torch.reshape(torch.tensor([2, 3]), [1, 1, 2])
	image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
	img_tensor = ToTensor()(image)

	predicted_logits, predicted_iou = model(
	img_tensor[None, ...].cuda(),
	bbox.cuda(),
	bbox_labels.cuda(),
	)
	predicted_logits = predicted_logits.cpu()
	all_masks = torch.ge(torch.sigmoid(predicted_logits[0, 0, :, :, :]), 0.5).numpy()
	predicted_iou = predicted_iou[0, 0, ...].cpu().detach().numpy()

	max_predicted_iou = -1
	selected_mask_using_predicted_iou = None
	for m in range(all_masks.shape[0]):
	curr_predicted_iou = predicted_iou[m]
	if (
	curr_predicted_iou > max_predicted_iou
	or selected_mask_using_predicted_iou is None
	):
	max_predicted_iou = curr_predicted_iou
	selected_mask_using_predicted_iou = all_masks[m]
	return selected_mask_using_predicted_iou


	# collect segment results from EfficientSAM
	result_masks = []
	for box in detections.xyxy:
	mask = efficient_sam_box_prompt_segment(image, box, efficientsam)
	result_masks.append(mask)

	detections.mask = np.array(result_masks)

	# annotate image with detections
	box_annotator = sv.BoxAnnotator()
	mask_annotator = sv.MaskAnnotator()
	labels = [
	f"{CLASSES[class_id]} {confidence:0.2f}"
	for _, _, confidence, class_id, _
	in detections]
	annotated_image = mask_annotator.annotate(scene=image.copy(), detections=detections)
	annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections, labels=labels)

	# save the annotated grounded-sam image
	cv2.imwrite("EfficientSAM/gronded_efficient_sam_anontated_image.jpg", annotated_image)