
    qi,&                         d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZmZ ddlmZ dd	lmZmZ g d
Zg dZ G d ded      Zd Z G d de      ZdgZy)z"Image processor class for Idefics.    )Callable)Image   )BaseImageProcessorBatchFeature)resizeto_channel_dimension_format)ChannelDimension
ImageInputPILImageResamplingmake_flat_list_of_imagesto_numpy_arrayvalid_images)ImagesKwargs)
TensorTypeis_torch_available)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?c                   6    e Zd ZU dZedz  ed<   eeef   ed<   y)IdeficsImageProcessorKwargsa  
    transform (`Callable`, *optional*):
        A custom transform function that accepts a single image can be passed for training. For example,
        `torchvision.Compose` can be used to compose multiple transforms. If `None` - an inference mode is
        assumed - and then a preset of inference-specific transforms will be applied to the images
    image_size (`dict[str, int]`, *optional*):
        Resize to image size
    N	transform
image_size)	__name__
__module____qualname____doc__r   __annotations__dictstrint     f/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/idefics/image_processing_idefics.pyr   r   &   s!     $S#Xr    r   F)totalc                     | j                   dk(  r| S | j                  d      }t        j                  d|j                  d      }t        j
                  ||      }|j                  d      }|S )NRGBRGBA)   r&   r&   )modeconvertr   newsizealpha_composite)image
image_rgba
backgroundr+   s       r!   convert_to_rgbr/   4   sa     zzUv&J6:??ODJ++J
CO%--e4Or    c                   6    e Zd ZdZdgZeZ	 	 	 	 	 	 ddedee	e   z  dz  dee	e   z  dz  dedz  d	e
d
eez  ddf fdZdddddddej                  fdededz  deeef   dz  dee	e   z  dz  dee	e   z  dz  dedz  d	e
dz  d
edz  deez  dz  defdZ xZS )IdeficsImageProcessora  
    Constructs a Idefics image processor.

    Args:
        image_size (`int`, *optional*, defaults to 224):
            Resize to image size
        image_mean (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        image_num_channels (`int`, *optional*, defaults to 3):
            Number of image channels.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
    pixel_valuesNr   r   
image_mean	image_stdimage_num_channels
do_rescalerescale_factorreturnc                     t        |   di | || _        || _        ||nt        | _        ||nt        | _        || _        || _	        y )Nr   )
super__init__r   r5   IDEFICS_STANDARD_MEANr3   IDEFICS_STANDARD_STDr4   r6   r7   )	selfr   r3   r4   r5   r6   r7   kwargs	__class__s	           r!   r;   zIdeficsImageProcessor.__init__]   sQ     	"6"$"4(2(>*DY&/&;AU$,r    imagesr   return_tensorsc
                    ||n| j                   }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j
                  }||f}t        |t              rt        |      dk(  rg S | j                  |      }t        |      }t        |      st        d      |?t               st        d      ddl}|D cg c]
  } ||       }}|j!                  |      S |D cg c]  }t#        |       }}|D cg c]  }t%        |       }}|D cg c]  }t'        ||t(        j*                          }}|D cg c]  }| j-                  ||       }}|D cg c]  }| j/                  |||       }}|D cg c]  }t1        |t2        j4                         }}t7        d|i|		      d   }|S c c}w c c}w c c}w c c}w c c}w c c}w c c}w )
a|  
        Preprocess a batch of images.

        Args:
            images (`ImageInput`):
                A list of images to preprocess.
            image_size (`int`, *optional*, defaults to `self.image_size`):
                Resize to image size
            image_num_channels (`int`, *optional*, defaults to `self.image_num_channels`):
                Number of image channels.
            image_mean (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_MEAN`):
                Mean to use if normalizing the image. This is a float or list of floats the length of the number of
                channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can
                be overridden by the `image_mean` parameter in the `preprocess` method.
            image_std (`float` or `list[float]`, *optional*, defaults to `IDEFICS_STANDARD_STD`):
                Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
                number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess`
                method. Can be overridden by the `image_std` parameter in the `preprocess` method.
            transform (`Callable`, *optional*, defaults to `None`):
                A custom transform function that accepts a single image can be passed for training. For example,
                `torchvision.Compose` can be used to compose multiple transforms. If `None` - an inference mode is
                assumed - and then a preset of inference-specific transforms will be applied to the images
            do_rescale (`bool`, *optional*, defaults to `True`):
                Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
                the `preprocess` method.
            rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
                Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
                method.

        Returns:
            a PyTorch tensor of the processed images

        Nr   zSInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.Tensorz.To pass in `transform` torch must be installed)resample)r,   scale)meanstdr2   )datatensor_type)r   r5   r3   r4   r6   r7   
isinstancelistlenfetch_imagesr   r   
ValueErrorr   ImportErrortorchstackr/   r   r   r   BICUBICrescale	normalizer	   r
   FIRSTr   )r>   rA   r5   r   r3   r4   r   r6   r7   rB   r?   r*   rP   xr,   s                  r!   
preprocessz IdeficsImageProcessor.preprocessp   s   \ $.#9Zt
3E3Q/W[WnWn#-#9Zt
!*!6IDNN	#-#9Zt
+9+E4K^K^J'fd#Fq(8I""6*)&1F#rss  %'!"RSS,23qil3F3;;v&& .44.#44-34.#44PVW1&D+=+E+EFWWOUVe$,,U.,AVVMST$...CTTRXYQ-a1A1G1GHYYNF#;XYgh 4 54WVTYs*   G<GG,#GG!5G&!G+)   NNr   Tgp?)r   r   r   r   model_input_namesr   valid_kwargsr   floatrK   boolr;   r   PYTORCHr   r   r   r   rW   __classcell__)r@   s   @r!   r1   r1   A   sn   0 ((.L 1504)*&--- DK'$.- 4;&-	-
  $J- - e- 
-, *+,01504%)"&'+2<2D2DYY  $JY cNT)	Y
 DK'$.Y 4;&-Y d?Y 4KY Y j(4/Y 
Yr    r1   N)r   collections.abcr   PILr   image_processing_utilsr   r   image_transformsr   r	   image_utilsr
   r   r   r   r   r   processing_utilsr   utilsr   r   r<   r=   r   r/   r1   __all__r   r    r!   <module>rg      sc    ) $  F C  - 3 < ; ,e 
H. HV #
#r    