
    qi&                         d dl mZmZmZ d dlZddlmZmZm	Z	m
Z
mZ ddlmZmZ  e	       rd dlmZ ddlmZ  e       rdd	lmZmZmZmZ  e
j0                  e      Z e ed
             G d de             Zy)    )AnyUnionoverloadN   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)*MODEL_FOR_IMAGE_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_INSTANCE_SEGMENTATION_MAPPING_NAMES-MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING_NAMES.MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING_NAMEST)has_image_processorc                   ,    e Zd ZdZdZdZdZdZ fdZd Z	e
deedf   d	ed
eeeef      fd       Ze
dee   ed   z  d	ed
eeeeef         fd       Zdeedee   ed   f   d	ed
eeeef      eeeeef         z  f fdZddZd Z	 ddZ xZS )ImageSegmentationPipelinea  
    Image segmentation pipeline using any `AutoModelForXXXSegmentation`. This pipeline predicts masks of objects and
    their classes.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> segmenter = pipeline(model="facebook/detr-resnet-50-panoptic")
    >>> segments = segmenter("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
    >>> len(segments)
    2

    >>> segments[0]["label"]
    'bird'

    >>> segments[1]["label"]
    'bird'

    >>> type(segments[0]["mask"])  # This is a black and white mask showing where is the bird on the original image.
    <class 'PIL.Image.Image'>

    >>> segments[0]["mask"].size
    (768, 512)
    ```


    This image segmentation pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"image-segmentation"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=image-segmentation).
    FTNc                    t        |   |i | t        | d       t        j                         }|j                  t               |j                  t               |j                  t               | j                  |       y )Nvision)
super__init__r   r   copyupdater   r   r   check_model_type)selfargskwargsmapping	__class__s       [/opt/pipecat/venv/lib/python3.12/site-packages/transformers/pipelines/image_segmentation.pyr   z"ImageSegmentationPipeline.__init__D   sb    $)&)$)<AACDEDEEFg&    c                     i }i }d|v r|d   |d<   |d   |d<   d|v r|d   |d<   d|v r|d   |d<   d|v r|d   |d<   d|v r|d   |d<   |i |fS )Nsubtask	thresholdmask_thresholdoverlap_mask_area_thresholdtimeout )r   r!   preprocess_kwargspostprocess_kwargss       r$   _sanitize_parametersz.ImageSegmentationPipeline._sanitize_parametersN   s    ,29,=y)+1)+<i(& .4[.A{+v%39:J3K/0(F2@FGd@e<=+1)+<i( "&888r%   inputszImage.Imager!   returnc                      y Nr,   r   r0   r!   s      r$   __call__z"ImageSegmentationPipeline.__call___   s    ber%   c                      y r3   r,   r4   s      r$   r5   z"ImageSegmentationPipeline.__call__b   s    nqr%   c                 h    d|v r|j                  d      }|t        d      t        |   |fi |S )a	  
        Perform segmentation (detect masks & classes) in the image(s) passed as inputs.

        Args:
            inputs (`str`, `list[str]`, `PIL.Image` or `list[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing an HTTP(S) link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

                The pipeline accepts either a single image or a batch of images. Images in a batch must all be in the
                same format: all as HTTP(S) links, all as local paths, or all as PIL images.
            subtask (`str`, *optional*):
                Segmentation task to be performed, choose [`semantic`, `instance` and `panoptic`] depending on model
                capabilities. If not set, the pipeline will attempt tp resolve in the following order:
                  `panoptic`, `instance`, `semantic`.
            threshold (`float`, *optional*, defaults to 0.9):
                Probability threshold to filter out predicted masks.
            mask_threshold (`float`, *optional*, defaults to 0.5):
                Threshold to use when turning the predicted masks into binary values.
            overlap_mask_area_threshold (`float`, *optional*, defaults to 0.5):
                Mask overlap threshold to eliminate small, disconnected segments.
            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            If the input is a single image, will return a list of dictionaries, if the input is a list of several images,
            will return a list of list of dictionaries corresponding to each image.

            The dictionaries contain the mask, label and score (where applicable) of each detected object and contains
            the following keys:

            - **label** (`str`) -- The class label identified by the model.
            - **mask** (`PIL.Image`) -- A binary mask of the detected object as a Pil Image of shape (width, height) of
              the original image. Returns a mask filled with zeros if no object is found.
            - **score** (*optional* `float`) -- Optionally, when the model is capable of estimating a confidence of the
              "object" described by the label and the mask.
        imageszICannot call the image-classification pipeline without an inputs argument!)pop
ValueErrorr   r5   )r   r0   r!   r#   s      r$   r5   z"ImageSegmentationPipeline.__call__e   sB    X vZZ)F>hiiw1&11r%   c                    t        ||      }|j                  |j                  fg}| j                  j                  j
                  j                  dk(  rx|i }nd|gi} | j                  d
|gdd|}|j                  | j                        }| j                  |d   d| j                  j                  j                  d      d   |d<   n/| j                  |gd      }|j                  | j                        }||d	<   |S )N)r+   OneFormerConfigtask_inputspt)r8   return_tensors
max_length)paddingr@   r?   	input_idstarget_sizer,   )r   heightwidthmodelconfigr#   __name__image_processortodtype	tokenizertask_seq_len)r   imager'   r+   rC   r!   r0   s          r$   
preprocessz$ImageSegmentationPipeline.preprocess   s   5'2ekk23::&&//3DD''3)T))X%XQWXFYYtzz*F$(NN}%$::,,99#	 %3 %
 %F=! ))%)NFYYtzz*F +}r%   c                 V    |j                  d      } | j                  di |}||d<   |S )NrC   r,   )r9   rF   )r   model_inputsrC   model_outputss       r$   _forwardz"ImageSegmentationPipeline._forward   s5    "&&}5"

2\2'2m$r%   c                 <   d }|dv r-t        | j                  d      r| j                  j                  }n0|dv r,t        | j                  d      r| j                  j                  }| ||||||d         d   }g }|d   }	|d	   D ]  }
|	|
d
   k(  dz  }t	        j
                  |j                         j                  t        j                        d      }| j                  j                  j                  |
d      }|
d   }|j                  |||d        |S |dv rt        | j                  d      r| j                  j                  ||d         d   }g }|j                         }	t        j                  |	      }|D ]v  }|	|k(  dz  }t	        j
                  |j                  t        j                        d      }| j                  j                  j                  |   }|j                  d ||d       x |S t!        d| dt#        | j                               )N>   Npanoptic"post_process_panoptic_segmentation>   Ninstance"post_process_instance_segmentationrC   )r(   r)   r*   target_sizesr   segmentationsegments_infoid   L)modelabel_idscore)ra   labelmask>   Nsemantic"post_process_semantic_segmentation)rY   zSubtask z is not supported for model )hasattrrI   rV   rX   r   	fromarraynumpyastypenpuint8rF   rG   id2labelappendre   uniquer:   type)r   rR   r'   r(   r)   r*   fnoutputs
annotationrZ   segmentrc   rb   ra   labelss                  r$   postprocessz%ImageSegmentationPipeline.postprocess   s4    ((WT5I5IKo-p%%HHB**wt7K7KMq/r%%HHB>#-,G*=9 G J">2L"?3 R$5<tzz|':':288'D3O

))227:3FG(!!EE4"PQR. ! **wt7K7KMq/r**MMM-,H N G J"==?LYY|,F Q$-4t{{288'<3G

))2259!!D5$"OP	Q  xy0LTRVR\R\M]L^_``r%   )NN)Ng?      ?rv   )rH   
__module____qualname____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r/   r   r   strr   listdictr5   rO   rS   ru   __classcell__)r#   s   @r$   r   r      s   !F O #O'9" euS-%78eCeDQUVY[^V^Q_L`e eqtCy4+>>q#qRVW[\`adfiai\jWkRlq q02CS	4;NNO02[^02	d38n	T$sCx.%9 :	:02d, kn,r%   r   )typingr   r   r   rh   rj   utilsr   r   r	   r
   r   baser   r   PILr   image_utilsr   models.auto.modeling_autor   r   r   r   
get_loggerrH   loggerr   r,   r%   r$   <module>r      ss    ' '  k k 4 (  
		H	% ,FGD D HDr%   