
    qi                          d dl mZmZmZmZ ddlmZmZmZm	Z	m
Z
 ddlmZmZ  e       rddlmZ  e       rd dlZddlmZmZ erd d	lmZ  e	j,                  e      Z e ed
             G d de             Zy)    )TYPE_CHECKINGAnyUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)
load_imageN)(MODEL_FOR_OBJECT_DETECTION_MAPPING_NAMES,MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES)ImageT)has_image_processorc                   *    e Zd ZdZdZdZdZdZ fdZd Z	e
deedf   d	ed
edeeeef      fd       Ze
dee   ed   z  d	ed
edeeeeef         fd       Zdeeeef      eeeeef         z  f fdZddZd ZddZdddeeef   fdZ xZS )ObjectDetectionPipelinea  
    Object detection pipeline using any `AutoModelForObjectDetection`. This pipeline predicts bounding boxes of objects
    and their classes.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> detector = pipeline(model="facebook/detr-resnet-50")
    >>> detector("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
    [{'score': 0.997, 'label': 'bird', 'box': {'xmin': 69, 'ymin': 171, 'xmax': 396, 'ymax': 507}}, {'score': 0.999, 'label': 'bird', 'box': {'xmin': 398, 'ymin': 105, 'xmax': 767, 'ymax': 507}}]

    >>> # x, y  are expressed relative to the top left hand corner.
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"object-detection"`.

    See the list of available models on [huggingface.co/models](https://huggingface.co/models?filter=object-detection).
    FTNc                     t        |   |i | t        | d       t        j                         }|j                  t               | j                  |       y )Nvision)super__init__r   r   copyupdater   check_model_type)selfargskwargsmapping	__class__s       Y/opt/pipecat/venv/lib/python3.12/site-packages/transformers/pipelines/object_detection.pyr   z ObjectDetectionPipeline.__init__8   sH    $)&)$):??ACDg&    c                 D    i }d|v r|d   |d<   i }d|v r|d   |d<   |i |fS )Ntimeout	threshold )r   r    preprocess_paramspostprocess_kwargss       r#   _sanitize_parametersz,ObjectDetectionPipeline._sanitize_parameters@   sL    +1)+<i(& .4[.A{+ "&888r$   imagezImage.Imager   r    returnc                      y Nr(   r   r,   r   r    s       r#   __call__z ObjectDetectionPipeline.__call__I   s    mpr$   c                      y r/   r(   r0   s       r#   r1   z ObjectDetectionPipeline.__call__L   s     &)r$   c                 Z    d|v rd|vr|j                  d      |d<   t        |   |i |S )ai  
        Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

        Args:
            inputs (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing an HTTP(S) link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
                same format: all as HTTP(S) links, all as local paths, or all as PIL images.
            threshold (`float`, *optional*, defaults to 0.5):
                The probability necessary to make a prediction.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries or a list of list of dictionaries containing the result. If the input is a single
            image, will return a list of dictionaries, if the input is a list of several images, will return a list of
            list of dictionaries corresponding to each image.

            The dictionaries contain the following keys:

            - **label** (`str`) -- The class label identified by the model.
            - **score** (`float`) -- The score attributed by the model for that label.
            - **box** (`list[dict[str, int]]`) -- The bounding box of detected object in image's original size.
        imagesinputs)popr   r1   )r   r   r    r"   s      r#   r1   z ObjectDetectionPipeline.__call__Q   s=    @ v(&"8%zz(3F8w000r$   c                 ,   t        ||      }t        j                  |j                  |j                  gg      }| j                  |gd      }|j                  | j                        }| j                  | j                  |d   |d   d      }||d<   |S )N)r&   pt)r4   return_tensorswordsboxes)textr;   r9   target_size)	r   torch	IntTensorheightwidthimage_processortodtype	tokenizer)r   r,   r&   r=   r5   s        r#   
preprocessz"ObjectDetectionPipeline.preprocessu   s    5'2ooekk'B&CD%%eWT%J4::&>>%^^w`d^eF +}r$   c                     |j                  d      } | j                  di |}|j                  d|i|      }| j                  |d   |d<   |S )Nr=   bboxr(   )r6   modelr"   rE   )r   model_inputsr=   outputsmodel_outputss        r#   _forwardz ObjectDetectionPipeline._forward   s^    "&&}5$**,|,))=+*Q*QR>>%$0$8M&!r$   c           	      6    |d   } j                   |d   j                         \   fd}|d   j                  d      j                  d      j	                  d      \  }}|j                         D cg c]%  } j
                  j                  j                  |   ' }}|d   j                  d      D 	cg c]
  }	 ||	       }
}	g d}t        |j                         ||
      D cg c]   }|d   |kD  st        t        ||            " }}|S  j                  j                  |||      }|d   }|d	   }|d
   }|d   }
|j                         |d	<   |D cg c]3  } j
                  j                  j                  |j                            5 c}|d
<   |
D cg c]  } j                  |       c}|d<   g d}t        |d	   |d
   |d         D cg c]  }t        t        ||             }}|S c c}w c c}	w c c}w c c}w c c}w c c}w )Nr=   r   c           
          j                  t        j                  | d   z  dz  | d   z  dz  | d   z  dz  | d   z  dz  g            S )Nr   i  r   r      )_get_bounding_boxr>   Tensor)rH   r@   r   rA   s    r#   unnormalizez8ObjectDetectionPipeline.postprocess.<locals>.unnormalize   si    --LL"T!W_t3#d1g-4"T!W_t3#d1g-4		 	r$   logits)dimrH   )scorelabelboxscoreslabelsr;   )rE   tolistsqueezesoftmaxmaxrI   configid2labelzipdictrB   post_process_object_detectionitemrQ   )r   rL   r'   r=   rS   rZ   classes
predictionr[   rH   r;   keysvals
annotationraw_annotationsraw_annotationrX   rY   r@   rA   s   `                 @@r#   postprocessz#ObjectDetectionPipeline.postprocess   s$   #M2>>% (N113MFE
 ,H5==a@HHRHPTTY[T\OFGOV~~O_`djj''00<`F`3@3H3P3PQR3ST4[&TET,D<?QWY^<_wDcghicjmvcv$s4/wJw( # #22PPQ^`ikvwO,Q/N#H-F#H-F"7+E'-}}N8$^d'eUZ

(9(9(B(B5::<(P'eN8$NS&Tst'='=c'B&TN7# -D  x 8.:RTbcjTkl St_%J 
 / aTw (f&Ts*   <*G=>H0H>H!8H#HHrY   ztorch.Tensorc                 Z    |j                         j                         \  }}}}||||d}|S )a%  
        Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

        Args:
            box (`torch.Tensor`): Tensor containing the coordinates in corners format.

        Returns:
            bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
        )xminyminxmaxymax)intr\   )r   rY   ro   rp   rq   rr   rH   s          r#   rQ   z)ObjectDetectionPipeline._get_bounding_box   s;     "%!1!1!3dD$	
 r$   r/   )g      ?)__name__
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r+   r   r   strr   listrc   r1   rF   rM   rm   rs   rQ   __classcell__)r"   s   @r#   r   r      s   0 O #O'9 peC$67ppspW[\`adfiai\jWkp p)#Ym!44)=@)LO)	d4S>"	#) )"14S#X+?$tDQTVYQYNG[B\+\ "1H+Z^ S#X r$   r   )typingr   r   r   r   utilsr   r	   r
   r   r   baser   r   image_utilsr   r>   models.auto.modeling_autor   r   PILr   
get_loggerrt   loggerr   r(   r$   r#   <module>r      ss    6 6 k k 4 ( 
 			H	% ,FGkh k Hkr$   