[docs]@add_end_docstrings(PIPELINE_INIT_ARGS)classObjectDetectionPipeline(Pipeline):""" Object detection pipeline using any :obj:`AutoModelForObjectDetection`. This pipeline predicts bounding boxes of objects and their classes. This object detection pipeline can currently be loaded from :func:`~transformers.pipeline` using the following task identifier: :obj:`"object-detection"`. See the list of available models on `huggingface.co/models <https://huggingface.co/models?filter=object-detection>`__. """def__init__(self,*args,**kwargs):super().__init__(*args,**kwargs)ifself.framework=="tf":raiseValueError(f"The {self.__class__} is only available in PyTorch.")requires_backends(self,"vision")self.check_model_type(MODEL_FOR_OBJECT_DETECTION_MAPPING)@staticmethoddefload_image(image:Union[str,"Image.Image"]):ifisinstance(image,str):ifimage.startswith("http://")orimage.startswith("https://"):# We need to actually check for a real protocol, otherwise it's impossible to use a local file# like http_huggingface_co.pngimage=Image.open(requests.get(image,stream=True).raw)elifos.path.isfile(image):image=Image.open(image)else:raiseValueError(f"Incorrect path or url, URLs must start with `http://` or `https://`, and {image} is not a valid path")elifisinstance(image,Image.Image):passelse:raiseValueError("Incorrect format used for image. Should be a URL linking to an image, a local path, or a PIL image.")image=image.convert("RGB")returnimagedef_sanitize_parameters(self,**kwargs):postprocess_kwargs={}if"threshold"inkwargs:postprocess_kwargs["threshold"]=kwargs["threshold"]return{},{},postprocess_kwargs
[docs]def__call__(self,*args,**kwargs)->Union[Predictions,List[Prediction]]:""" Detect objects (bounding boxes & classes) in the image(s) passed as inputs. Args: images (:obj:`str`, :obj:`List[str]`, :obj:`PIL.Image` or :obj:`List[PIL.Image]`): The pipeline handles three types of images: - A string containing an HTTP(S) link pointing to an image - A string containing a local path to an image - An image loaded in PIL directly The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the same format: all as HTTP(S) links, all as local paths, or all as PIL images. threshold (:obj:`float`, `optional`, defaults to 0.9): The probability necessary to make a prediction. Return: A list of dictionaries or a list of list of dictionaries containing the result. If the input is a single image, will return a list of dictionaries, if the input is a list of several images, will return a list of list of dictionaries corresponding to each image. The dictionaries contain the following keys: - **label** (:obj:`str`) -- The class label identified by the model. - **score** (:obj:`float`) -- The score attributed by the model for that label. - **box** (:obj:`List[Dict[str, int]]`) -- The bounding box of detected object in image's original size. """returnsuper().__call__(*args,**kwargs)
def_get_bounding_box(self,box:"torch.Tensor")->Dict[str,int]:""" Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... } Args: box (torch.Tensor): Tensor containing the coordinates in corners format. Returns: bbox (Dict[str, int]): Dict containing the coordinates in corners format. """ifself.framework!="pt":raiseValueError("The ObjectDetectionPipeline is only available in PyTorch.")xmin,ymin,xmax,ymax=box.int().tolist()bbox={"xmin":xmin,"ymin":ymin,"xmax":xmax,"ymax":ymax,}returnbbox