# Built from megadetector section from https://huggingface.co/spaces/hlydecker/MegaDetector_v5 # Built from https://huggingface.co/spaces/sofmi/MegaDetector_DLClive/ # Built from https://huggingface.co/spaces/Neslihan/megadetector_dlcmodels/ # %% from tkinter import W import gradio as gr import torch import torchvision from dlclive import DLCLive, Processor from dlcmodel.models import DownloadModel from PIL import Image, ImageColor, ImageFont, ImageDraw import matplotlib import numpy as np import math from math import dist #vsccchange from statistics import mean #vsccchange import json import os import yaml import pandas as pd #vsccchange from dlcmodel.models import DownloadModel from save_results import save_results import pdb ######################################### # %% # Input params FONTS = {'amiko': "font/Amiko-Regular.ttf", 'nature': "font/LoveNature.otf", 'painter':"font/PainterDecorator.otf", 'animals': "font/UncialAnimals.ttf", 'zen': "font/ZEN.TTF"} DLCFOLDERS = {'full_cat': "dlcmodel/DLC_Cat/", 'full_dog': "dlcmodel/DLC_Dog/", 'full_cheetah': "dlcmodel/DLC_Cheetah/", 'full_human': "dlcmodel/DLC_human_dancing/", 'full_macaque': "dlcmodel/models/DLC_monkey/", 'quadruped': "dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1/"} DLCMODELS = {'full_cat': "dlcmodel/DLC_Cat_resnet_50_iteration-0_shuffle-0", 'full_dog': "dlcmodel/DLC_Dog_resnet_50_iteration-0_shuffle-0", 'full_cheetah': "dlcmodel/DLC_Cheetah_resnet_152_iteration-27_shuffle-1", 'full_human': "dlcmodel/DLC_human_dancing_resnet_101_iteration-0_shuffle-1", 'full_macaque': "dlcmodel/models/DLC_monkey_resnet_50_iteration-0_shuffle-1", 'quadruped': "dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1"} Megadet_Models = {'md_v5a': "megadet_model/md_v5a.0.0.pt", 'md_v5b': "megadet_model/md_v5b.0.0.pt"} DLC_models_list = ['full_cat', 'full_cheetah', 'full_dog','full_human', 'full_macaque', 'quadruped'] ############################################# # %% # User interface: inputs # Input image gr_image_input = gr.inputs.Image(type="pil", label="Input Image") # Models gr_dlc_model_input = gr.inputs.Dropdown(choices=list(DLC_models_list), # choices default='full_cat', # default option type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected. label='Select DeepLabCut model') gr_mega_model_input = gr.inputs.Dropdown(choices=list(Megadet_Models.keys()), default='md_v5a', # default option type='value', # Type of value to be returned by component. "value" returns the string of the choice selected, "index" returns the index of the choice selected. label='Select MegaDetector model') # Other inputs gr_dlc_only_checkbox = gr.inputs.Checkbox(False, label='Run DLClive only, directly on input image') #vscchange no question mark gr_str_labels_checkbox = gr.inputs.Checkbox(True, label='Show bodypart labels') #vscchange no question mark gr_slider_conf_bboxes = gr.inputs.Slider(0,1,.02,0.8, label='Set confidence threshold for animal detections') gr_slider_conf_keypoints = gr.inputs.Slider(0,1,.05,0, label='Set confidence threshold for keypoints') # Data viz gr_keypt_color = gr.ColorPicker(value ="#ff0000", label="choose color for keypoint label") gr_labels_font_style = gr.inputs.Dropdown(choices=['amiko', 'animals', 'nature', 'painter', 'zen'], #vscchange alphabetical order default='amiko', type='value', label='Select keypoint label font') gr_slider_font_size = gr.inputs.Slider(5,30,1,8, label='Set font size (pt)') #vscchange using units gr_slider_marker_size = gr.inputs.Slider(1,5,0.2,2, #vsccchange - change marker scales label='Set marker size (pixel)') #vscchange using units gr_mega_bb_color = gr.ColorPicker(value ="#ff0000", label="choose color for megadetector bounding box") #vscchange gr_mega_bb_width = gr.inputs.Slider(1,20,1,5, label='Set width of megadetector bounding box)') #vscchange # list of inputs inputs = [gr_image_input, gr_mega_model_input, gr_dlc_model_input, gr_dlc_only_checkbox, gr_str_labels_checkbox, gr_slider_conf_bboxes, gr_slider_conf_keypoints, gr_labels_font_style, gr_slider_font_size, gr_keypt_color, gr_slider_marker_size, gr_mega_bb_color,#vscchange gr_mega_bb_width, #vscchange ] #image = gr.inputs.Image(type="pil", label="Input Image") #chosen_model = gr.inputs.Dropdown(choices = models, value = "model_weights/md_v5a.0.0.pt",type = "value", label="Model Weight") #size = 640 ######################################### # %% # Draw keypoints on image def draw_keypoints_on_image(image, keypoints, map_label_id_to_str, flag_show_str_labels, use_normalized_coordinates=True, font_style='amiko', font_size=8, keypt_color="#ff0000", marker_size=2, ): """Draws keypoints on an image. Modified from: https://www.programcreek.com/python/?code=fjchange%2Fobject_centric_VAD%2Fobject_centric_VAD-master%2Fobject_detection%2Futils%2Fvisualization_utils.py Args: image: a PIL.Image object. keypoints: a numpy array with shape [num_keypoints, 2]. map_label_id_to_str: dict with keys=label number and values= label string flag_show_str_labels: boolean to select whether or not to show string labels color: color to draw the keypoints with. Default is red. radius: keypoint radius. Default value is 2. use_normalized_coordinates: if True (default), treat keypoint values as relative to the image. Otherwise treat them as absolute. """ # get a drawing context draw = ImageDraw.Draw(image) im_width, im_height = image.size keypoints_x = [k[0] for k in keypoints] keypoints_y = [k[1] for k in keypoints] alpha = [k[2] for k in keypoints] # adjust keypoints coords if required if use_normalized_coordinates: keypoints_x = tuple([im_width * x for x in keypoints_x]) keypoints_y = tuple([im_height * y for y in keypoints_y]) cmap = matplotlib.cm.get_cmap('hsv') cmap2 = matplotlib.cm.get_cmap('Greys') # draw ellipses around keypoints for i, (keypoint_x, keypoint_y) in enumerate(zip(keypoints_x, keypoints_y)): round_fill = [round(num*255) for num in list(cmap(i*10))[:3]] #check! round_outline = [round(num*255) for num in list(cmap2(alpha[i]))[:3]] draw.ellipse([(keypoint_x - marker_size, keypoint_y - marker_size), (keypoint_x + marker_size, keypoint_y + marker_size)], fill=tuple(round_fill), outline= tuple(round_outline), width=2) #fill and outline: [0,255] # add string labels around keypoints if flag_show_str_labels: font = ImageFont.truetype(FONTS[font_style], font_size) draw.text((keypoint_x + marker_size, keypoint_y + marker_size),#(0.5*im_width, 0.5*im_height), #------- map_label_id_to_str[i], ImageColor.getcolor(keypt_color, "RGB"), # rgb font=font) ############################################ # %% # Predict detections with MegaDetector v5a model def predict_md(im, mega_model_input, size=640): # resize image g = (size / max(im.size)) # multipl factor to make max size of the image equal to input size im = im.resize((int(x * g) for x in im.size), Image.ANTIALIAS) # resize MD_model = torch.hub.load('ultralytics/yolov5', 'custom', Megadet_Models[mega_model_input]) ## detect objects results = MD_model(im) # inference # vars(results).keys()= dict_keys(['imgs', 'pred', 'names', 'files', 'times', 'xyxy', 'xywh', 'xyxyn', 'xywhn', 'n', 't', 's']) return results ########################################## # %% def crop_animal_detections(img_in, yolo_results, likelihood_th): ## Extract animal crops list_labels_as_str = [i for i in yolo_results.names.values()] # ['animal', 'person', 'vehicle'] list_np_animal_crops = [] # image to crop (scale as input for megadetector) img_in = img_in.resize((yolo_results.ims[0].shape[1], yolo_results.ims[0].shape[0])) # for every detection in the img for det_array in yolo_results.xyxy: # for every detection for j in range(det_array.shape[0]): # compute coords around bbox rounded to the nearest integer (for pasting later) xmin_rd = int(math.floor(det_array[j,0])) # int() should suffice? ymin_rd = int(math.floor(det_array[j,1])) xmax_rd = int(math.ceil(det_array[j,2])) ymax_rd = int(math.ceil(det_array[j,3])) pred_llk = det_array[j,4] pred_label = det_array[j,5] # keep animal crops above threshold if (pred_label == list_labels_as_str.index('animal')) and \ (pred_llk >= likelihood_th): area = (xmin_rd, ymin_rd, xmax_rd, ymax_rd) #pdb.set_trace() crop = img_in.crop(area) #Image.fromarray(img_in).crop(area) crop_np = np.asarray(crop) # add to list list_np_animal_crops.append(crop_np) return list_np_animal_crops ######################################### # %% def draw_rectangle_text(img,results,font_style='amiko',font_size=8, keypt_color="white", mega_bb_color='red', mega_bb_width='5'): #vsccchange #pdb.set_trace() bbxyxy = results print("bbxyxy") print(bbxyxy) w, h = bbxyxy[2], bbxyxy[3] print("w is") print(w) print("h is") print(h) print("bbxyxy[0]") print(bbxyxy[0]) print("bbxyxy[1]") print(bbxyxy[1]) shape = [(bbxyxy[0], bbxyxy[1]), (w , h)] imgR = ImageDraw.Draw(img) imgR.rectangle(shape, outline =mega_bb_color,width=mega_bb_width) ##bb for animal #vscchange print("confidence") print(bbxyxy[4]) confidence = bbxyxy[4] string_bb = 'animal ' + str(round(confidence, 2)) font = ImageFont.truetype(FONTS[font_style], font_size) text_size = font.getsize(string_bb) # (h,w) print("text_size") print(text_size) print("text_size[1]") print(text_size[1]) position = (bbxyxy[0],bbxyxy[1] - text_size[1] -2 ) left, top, right, bottom = imgR.textbbox(position, string_bb, font=font) imgR.rectangle((left, top-mega_bb_width, right+mega_bb_width, bottom+mega_bb_width), fill=mega_bb_color) #vscchange, instead of red by default imgR.text((bbxyxy[0] + 3 ,bbxyxy[1] - text_size[1] -2 ), string_bb, font=font, fill=keypt_color) #vscchange, instead of black by default return imgR ########################################## # %% def predict_dlc(list_np_crops, kpts_likelihood_th, DLCmodel, dlc_proc): # run dlc thru list of crops dlc_live = DLCLive(DLCmodel, processor=dlc_proc) print(list_np_crops) print(list_np_crops[0]) print(type(list_np_crops)) dlc_live.init_inference(list_np_crops[0]) list_kpts_per_crop = [] all_kypts = [] np_aux = np.empty((1,3)) # can I avoid hardcoding here? for crop in list_np_crops: # scale crop here? keypts_xyp = dlc_live.get_pose(crop) # third column is llk! # set kpts below threhsold to nan #pdb.set_trace() keypts_xyp[keypts_xyp[:,-1] < kpts_likelihood_th,:] = np_aux.fill(np.nan) # add kpts of this crop to list list_kpts_per_crop.append(keypts_xyp) all_kypts.append(keypts_xyp) #return confidence here return list_kpts_per_crop #################################################### def social(output_file): #vsccchange ############################### # extract information from the json file with open(output_file) as f: json_data = json.load(f) print(json_data) #see how many bb there are print("json_data[number_of_bb]") print(json_data["number_of_bb"]) #for each bounding box, depending on how many there are bb_list = [] for num in range((json_data["number_of_bb"])): bb = "bb_" + str(num) bb_list.append(bb) print("bb_list") print(bb_list) # for each bounding box, print "dlc_pred" for bb_num in bb_list: bb_cont = json_data[bb_num] print(bb_cont["dlc_pred"]) ################################ # reference length using nose to eyes # use the first bb if have nose to eyes, or any that has? eye_names = ['L_Eye', 'R_Eye', 'r_eye', 'l_eye', 'forehead', 'left_eye', 'right_eye'] #names to reflect back of all animal models eye_list = [] nose_names = ['Nose', 'nose', 'chin'] #names to reflect back of all animal models nose_list = [ ] # if eyes are in the dlc pred for one bb, then use that # if eyes are present, then use the nose of the same animal... (second for loop) for bb_num in bb_list: bb_cont = json_data[bb_num] bb_body = bb_cont["dlc_pred"] for bodypart in bb_body: if bodypart in eye_names: print("bb_body[bodypart]") print(bb_body[bodypart]) eye_list.append(bb_body[bodypart]) for bodypart in bb_body: if bodypart in nose_names: print("bb_body[bodypart]") print(bb_body[bodypart]) nose_list.append(bb_body[bodypart]) break else: print("no eyes & nose present") print("eye_list") print(eye_list) print("nose_list") print(nose_list) # obtain only the x & y from the list (not llk) eye_list_coord = [] for pose in eye_list: fl_pose = [float(i) for i in pose] #turn all into float if fl_pose[2] < 1.0: #set this threshold for pose likelihood, should be 0.06, but keep for now for demosntration eye_list_coord.append(fl_pose[:2]) print("eye_list_coord") print(eye_list_coord) # obtain only the x & y from the list (not llk) nose_list_coord = [] for pose in nose_list: fl_pose = [float(i) for i in pose] #turn all into float if fl_pose[2] < 1.0: #set this threshold for pose likelihood, should be 0.06, but keep for now for demosntration nose_list_coord.append(fl_pose[:2]) print("nose_list_coord") print(nose_list_coord) # obtain the dist between both eyes (or one eye) nose_coord = max(nose_list_coord) nose2eye_dist = [] #list of all comparing coordinates for eye in eye_list_coord: dist = math.dist(eye, nose_coord) nose2eye_dist.append(dist) print("nose2eye_dist") print(nose2eye_dist) ref = mean(nose2eye_dist) print("ref") print(ref) ################################ # if there are more than 2 bounding box with animals present # if you have any keypoints in the same vincity of keypoints to another # loop around one animal pose, and another #if json_data["number_of_bb"] >= 2: #save each bounding box as separate variable bb_dlc = {} for bb_num in bb_list: bb_cont = json_data[bb_num] bb_body = bb_cont["dlc_pred"] bb_dlc[str(bb_num)] = bb_body print("bb_dlc") print(bb_dlc) # this one is just obtaining the coordinates, no bodyparts required bb_dlc_xy = {} for bb_num in bb_dlc: bb_body = bb_dlc[bb_num] #print("bb_body") #print(bb_body) bb_dlc_xy[bb_num] = [] for body in bb_body: bodyval = bb_body[body] #print("bodyval") #print(body) #print(bodyval) bodyval_xy = np.delete(bodyval, obj=2) bodyval_xy = list(bodyval_xy) #make it a list instead of array #print("bodyval after delete") #print(body) #print(bodyval_xy) bb_dlc_xy[bb_num].append(bodyval_xy) #body to just xy coordinates print("bb_dlc_xy") print(bb_dlc_xy) #output individual dictionaries with x & y ################################ #calculates the euclidean distance between two bounding box, for all poses #loop over the number of bounding boxes... currently presumes 2, but need to change to any num int_matrx = [] for x,y in bb_dlc_xy['bb_0']: bb0 = x,y for a,b in bb_dlc_xy['bb_1']: bb1 = a,b soc = math.dist(bb0, bb1) int_matrx.append(soc) print(int_matrx) # if any of the dist in the list is below the reference if any(i < ref for i in int_matrx): print("physically interacting") phys_int = "physically interacting" else: print("not physically interacting") phys_int = "not physically interacting" return phys_int ##################################################### ##################################################### # %% def predict_pipeline(img_input, mega_model_input, dlc_model_input_str, flag_dlc_only, flag_show_str_labels, bbox_likelihood_th, kpts_likelihood_th, font_style, font_size, keypt_color, marker_size, mega_bb_color, #vsccchange mega_bb_width #vscchange ): ############################################################ ## Get DLC model and labels as strings # TODO: make a dict as for megadetector # pdb.set_trace() path_to_DLCmodel = DLCFOLDERS[dlc_model_input_str] #DownloadModel(dlc_model_input_str, DLCFOLDERS[dlc_model_input_str]) pose_cfg_path = DLCFOLDERS[dlc_model_input_str]+'pose_cfg.yaml' #pdb.set_trece() # extract map label ids to strings # pose_cfg_dict['all_joints'] is a list of one-element lists, with open(pose_cfg_path, "r") as stream: pose_cfg_dict = yaml.safe_load(stream) map_label_id_to_str = dict([(k,v) for k,v in zip([el[0] for el in pose_cfg_dict['all_joints']], pose_cfg_dict['all_joints_names'])]) ############################################################ # ### Run Megadetector md_results = predict_md(img_input, mega_model_input, size=640) #Image.fromarray(results.imgs[0]) #pdb.set_trace() ################################################################ # Obtain animal crops for bboxes with confidence above th list_crops = crop_animal_detections(img_input, md_results, bbox_likelihood_th) ############################################################## # Run DLC dlc_proc = Processor() # if required: ignore MD crops and run DLC on full image [mostly for testing] if flag_dlc_only: # compute kpts on input img list_kpts_per_crop = predict_dlc([np.asarray(img_input)], kpts_likelihood_th, path_to_DLCmodel, dlc_proc) # draw kpts on input img #fix! draw_keypoints_on_image(img_input, list_kpts_per_crop[0], # a numpy array with shape [num_keypoints, 2]. map_label_id_to_str, flag_show_str_labels, use_normalized_coordinates=False, font_style=font_style, font_size=font_size, keypt_color=keypt_color, marker_size=marker_size) return img_input, download_file else: # Compute kpts for each crop list_kpts_per_crop = predict_dlc(list_crops, kpts_likelihood_th, "dlcmodel/DLC_ma_superquadruped_resnet_50_iteration-0_shuffle-1", #path_to_DLCmodel, dlc_proc) img_background = img_input.resize((md_results.ims[0].shape[1],md_results.ims[0].shape[0])) print('I have ' + str(len(list_crops)) + ' bounding box') for ic, (np_crop, kpts_crop) in enumerate(zip(list_crops, list_kpts_per_crop)): ## Draw keypts on crop img_crop = Image.fromarray(np_crop) draw_keypoints_on_image(img_crop, kpts_crop, # a numpy array with shape [num_keypoints, 2]. map_label_id_to_str, flag_show_str_labels, use_normalized_coordinates=False, # if True, then I should use md_results.xyxyn for list_kpts_crop font_style=font_style, font_size=font_size, keypt_color=keypt_color, marker_size=marker_size) ## Paste crop in original image https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image.paste img_background.paste(img_crop, box = tuple([int(t) for t in md_results.xyxy[0][ic,:2]])) #set trh!! FIXME bb_per_animal = md_results.xyxy[0].tolist()[ic] pred = md_results.xyxy[0].tolist()[ic][4] if bbox_likelihood_th < pred: draw_rectangle_text(img_background, bb_per_animal ,font_style=font_style,font_size=font_size, keypt_color=keypt_color, mega_bb_color=mega_bb_color, mega_bb_width=mega_bb_width) #vsccchange print(pred) download_file = save_results(md_results,list_kpts_per_crop,map_label_id_to_str,bbox_likelihood_th) phys_int = social(download_file) return img_background, download_file, phys_int #################################################### # %% # User interface: outputs gr_image_output = gr.outputs.Image(type="pil", label="Output Image") out_smpl_npy_download = gr.File(label="Download JSON file") gr_behaviomics_output = gr.Textbox(label = 'what behaviour is happening?') #vsccchange outputs = [gr_image_output,out_smpl_npy_download, gr_behaviomics_output] ############################################## # %% # User interace: description gr_title = "megadetdlc TRIALLING" gr_description = "Contributed by Sofia Minano, Neslihan Wittek, Nirel Kadzo, VicShaoChih Chiang -- DLC AI Residents 2022..\ This App detects and estimate the pose of animals in camera trap images using MegaDetector v5a + DeepLabCut-live. \ We host models from the DeepLabCut ModelZoo Project\, and two MegaDetector Models. Please carefully check their licensing information if you use this project. The App additionally builds upon on work from hlydecker/MegaDetector_v5 \ sofmi/MegaDetector_DLClive \ Neslihan/megadetector_dlcmodels\." ########################################### # %% demo = gr.Interface(predict_pipeline, inputs=inputs, outputs=outputs, title=gr_title, description=gr_description, theme="huggingface", #live=True ) demo.launch(enable_queue=True, #share=True ) # %%