
    qi                        U d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	Z
ddlmZmZmZ ddlmZmZ dd	lmZmZmZmZmZmZmZmZmZmZmZ dd
lmZ ddl m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&  e#       rddl'Z' e$jP                  e)      Z*eZ+eZ,g dZ-g dZ.dZ/e0e1d<    G d ded      Z2 G d d      Z3 e&d       G d de             Z4dgZ5y)z Image processor class for Flava.    N)Iterable)	lru_cache)Any   )BaseImageProcessorBatchFeatureget_size_dict)resizeto_channel_dimension_format)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplinginfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)ImagesKwargs)
TensorTypefilter_out_non_signature_kwargsis_vision_availablelogging)requires)        r   r   )      ?r   r   g?LOGIT_LAPLACE_EPSc                       e Zd ZU dZeed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   eed<   eed<   eez  ed<   eed<   eed<   eee   z  ed<   eee   z  ed<   y)FlavaImageProcessorKwargsaI  
    return_image_mask (`bool`, *optional*, defaults to `False`):
        Whether to return the image mask. Can be overridden by the `return_image_mask` parameter in `preprocess`.
    input_size_patches (`int`, *optional*, defaults to 14):
        Number of patches in the image in height and width direction. 14x14 = 196 total patches. Can be overridden
        by the `input_size_patches` parameter in `preprocess`.
    total_mask_patches (`int`, *optional*, defaults to 75):
        Total number of patches that should be masked. Can be overridden by the `total_mask_patches` parameter in
        `preprocess`.
    mask_group_min_patches (`int`, *optional*, defaults to 16):
        Minimum number of patches that should be masked. Can be overridden by the `mask_group_min_patches`
        parameter in `preprocess`.
    mask_group_max_patches (`int`, *optional*):
        Maximum number of patches that should be masked. Can be overridden by the `mask_group_max_patches`
        parameter in `preprocess`.
    mask_group_min_aspect_ratio (`float`, *optional*, defaults to 0.3):
        Minimum aspect ratio of the mask window. Can be overridden by the `mask_group_min_aspect_ratio` parameter
        in `preprocess`.
    mask_group_max_aspect_ratio (`float`, *optional*):
        Maximum aspect ratio of the mask window. Can be overridden by the `mask_group_max_aspect_ratio` parameter
        in `preprocess`.
    return_codebook_pixels (`bool`, *optional*, defaults to `False`):
        Whether to return the codebook pixel values.
    codebook_do_resize (`bool`, *optional*, defaults to `True`):
        Whether to resize the input for codebook to a certain. Can be overridden by the `codebook_do_resize`
        parameter in `preprocess`. `codebook_size`.
    codebook_size (`dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
        Resize the input for codebook to the given size. Can be overridden by the `codebook_size` parameter in
        `preprocess`.
    codebook_resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.LANCZOS`):
        Resampling filter to use if resizing the codebook image. Can be overridden by the `codebook_resample`
        parameter in `preprocess`.
    codebook_do_center_crop (`bool`, *optional*, defaults to `True`):
        Whether to crop the input for codebook at the center. If the input size is smaller than
        `codebook_crop_size` along any edge, the image is padded with 0's and then center cropped. Can be
        overridden by the `codebook_do_center_crop` parameter in `preprocess`.
    codebook_crop_size (`dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
        Desired output size for codebook input when applying center-cropping. Can be overridden by the
        `codebook_crop_size` parameter in `preprocess`.
    codebook_do_rescale (`bool`, *optional*, defaults to `True`):
        Whether to rescale the input for codebook by the specified scale `codebook_rescale_factor`. Can be
        overridden by the `codebook_do_rescale` parameter in `preprocess`.
    codebook_rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
        Defines the scale factor to use if rescaling the codebook image. Can be overridden by the
        `codebook_rescale_factor` parameter in `preprocess`.
    codebook_do_map_pixels (`bool`, *optional*, defaults to `True`):
        Whether to map the pixel values of the codebook input to (1 - 2e)x + e. Can be overridden by the
        `codebook_do_map_pixels` parameter in `preprocess`.
    codebook_do_normalize (`bool`, *optional*, defaults to `True`):
        Whether or not to normalize the input for codebook with `codebook_image_mean` and `codebook_image_std`. Can
        be overridden by the `codebook_do_normalize` parameter in `preprocess`.
    codebook_image_mean (`Optional[Union[float, Iterable[float]]]`, *optional*, defaults to `[0, 0, 0]`):
        The sequence of means for each channel, to be used when normalizing images for codebook. Can be overridden
        by the `codebook_image_mean` parameter in `preprocess`.
    codebook_image_std (`Optional[Union[float, Iterable[float]]]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
        The sequence of standard deviations for each channel, to be used when normalizing images for codebook. Can
        be overridden by the `codebook_image_std` parameter in `preprocess`.
    return_image_maskinput_size_patchestotal_mask_patchesmask_group_min_patchesmask_group_max_patchesmask_group_min_aspect_ratiomask_group_max_aspect_ratioreturn_codebook_pixelscodebook_do_resizecodebook_sizecodebook_resamplecodebook_do_center_cropcodebook_crop_sizecodebook_do_rescalecodebook_rescale_factorcodebook_do_map_pixelscodebook_do_normalizecodebook_image_meancodebook_image_stdN)	__name__
__module____qualname____doc__bool__annotations__intfloatr        b/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/flava/image_processing_flava.pyr!   r!   ;   s    9x !&&!&&  !! 5[(  %00//r>   r!   F)totalc                   t    e Zd Z	 	 	 	 	 	 ddeeeef   z  dededz  dededz  dedz  fdZd	 Zd
 Zd Z	d Z
y)FlavaMaskingGeneratorN
input_sizer$   r&   r%   r'   r(   c                 ,   t        |t              s|fdz  }|\  | _        | _        | j                  | j                  z  | _        || _        || _        ||n|| _        |xs d|z  }t        j                  |      t        j                  |      f| _
        y )N      )
isinstancetupleheightwidthnum_patchesr$   r%   r&   mathloglog_aspect_ratio)selfrC   r$   r&   r%   r'   r(   s          r?   __init__zFlavaMaskingGenerator.__init__   s     *e,$*J",TZ;;3"4&<#<R<Z&8`v#&A&dQIdEd#!%*E!FQlHm nr>   c           	          d| j                   | j                  | j                  | j                  | j                  | j
                  d   | j
                  d   fz  }|S )Nz<MaskingGenerator(%d, %d -> [%d ~ %d], max = %d, %.3f ~ %.3f)r   rF   )rI   rJ   r%   r&   r$   rN   )rO   repr_strs     r?   __repr__zFlavaMaskingGenerator.__repr__   s^    QKKJJ''''##!!!$!!!$U
 
 r>   c                 2    | j                   | j                  fS )NrI   rJ   )rO   s    r?   	get_shapezFlavaMaskingGenerator.get_shape   s    {{DJJ&&r>   c           	      4   d}t        d      D ]  }t        j                  | j                  |      }t	        j
                  t        j                  | j                         }t        t        t	        j                  ||z                    }t        t        t	        j                  ||z                    }|| j                  k  s|| j                  k  st        j                  d| j                  |z
        }	t        j                  d| j                  |z
        }
||	|	|z   |
|
|z   f   j                         }d||z  |z
  cxk  r|k  rBn n?t        |	|	|z         D ]-  }t        |
|
|z         D ]  }|||f   dk(  sd|||f<   |dz  } / |dkD  s |S  |S )Nr   
   rF   )rangerandomuniformr%   rL   exprN   r;   roundsqrtrJ   rI   randintsum)rO   maskmax_mask_patchesdelta_attempttarget_areaaspect_ratiorI   rJ   topleft
num_maskedijs                 r?   _maskzFlavaMaskingGenerator._mask   s   b	 	H ..)D)DFVWK88FNND4I4I$JKLtyy|)CDEFFdiil(BCDEEtzz!ft{{&:nnQf(<=~~ae);<!#f"4dTE\6I"IJNNP
v~
2F6FF"3f5 +!&tTE\!: +A#AqDzQ-.QT
 %
++ 19)	( r>   c                 "   t        j                  | j                         t              }d}|| j                  k  rT| j                  |z
  }t        || j                        }| j                  ||      }|dk(  r	 |S ||z  }|| j                  k  rT|S )N)shapedtyper   )npzerosrV   r;   r$   minr&   rl   )rO   ra   
mask_countrb   rc   s        r?   __call__zFlavaMaskingGenerator.__call__   s    xxdnn.c:
4222#66C"#3T5P5PQJJt%56Ez  e#
 4222 r>   )   K   N   333333?N)r5   r6   r7   r;   rH   r<   rP   rS   rV   rl   rt   r=   r>   r?   rB   rB      s     -/"$-1&(4748o%S/)o  o !$d
	o
 !$o &+T\o &+T\o,
'0r>   rB   )vision)backendsc            F           e Zd ZdZdgZeZddej                  ddddddddddd	dd
ddddej                  ddddddddfde
deeef   dz  dede
deeef   dz  de
deez  de
deee   z  dz  deee   z  dz  de
dededededz  dededz  de
de
de
dz  ded e
d!edz  d"e
d#eez  d$e
d%e
d&eee   z  dz  d'eee   z  dz  d(df< fd)Zed*eeef   f fd+       Zed(efd,       Zej                  ddfd-ej0                  deeef   ded.eez  dz  d/eez  dz  d(ej0                  fd0Zd-ej0                  d(ej0                  fd1Zdddddddddddej8                  dfd-ede
dz  deeef   dz  dedz  de
dz  deeef   dz  de
dz  dedz  de
dz  deee   z  dz  deee   z  dz  d2e
dz  d.edz  d/edz  d(ej0                  fd3Z e        ddddddddddddddddddddddddddddddej8                  df d4ede
dz  deeef   dz  dedz  de
dz  deeef   dz  de
dz  dedz  de
dz  deee   z  dz  deee   z  dz  de
dz  dedz  dedz  dedz  dedz  dedz  dedz  de
dz  de
dz  deeef   dz  dedz  d e
dz  d!eeef   dz  d"e
dz  d#edz  d$e
dz  d%e
dz  d&ee   dz  d'ee   dz  d5ee!z  dz  d.ed/eez  dz  d(e"jF                  jF                  fDd6       Z$ xZ%S )7FlavaImageProcessora  
    Constructs a Flava image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
            `do_resize` parameter in `preprocess`.
        size (`dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`):
            Size of the image after resizing. Can be overridden by the `size` parameter in `preprocess`.
        resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
            Resampling filter to use if resizing the image. Can be overridden by the `resample` parameter in
            `preprocess`.
        do_center_crop (`bool`, *optional*, defaults to `True`):
            Whether to center crop the images. Can be overridden by the `do_center_crop` parameter in `preprocess`.
        crop_size (`dict[str, int]` *optional*, defaults to `{"height": 224, "width": 224}`):
            Size of image after the center crop `(crop_size["height"], crop_size["width"])`. Can be overridden by the
            `crop_size` parameter in `preprocess`.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the `do_rescale`
            parameter in `preprocess`.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by the `rescale_factor` parameter in
            `preprocess`.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in `preprocess`.
        image_mean (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
        return_image_mask (`bool`, *optional*, defaults to `False`):
            Whether to return the image mask. Can be overridden by the `return_image_mask` parameter in `preprocess`.
        input_size_patches (`int`, *optional*, defaults to 14):
            Number of patches in the image in height and width direction. 14x14 = 196 total patches. Can be overridden
            by the `input_size_patches` parameter in `preprocess`.
        total_mask_patches (`int`, *optional*, defaults to 75):
            Total number of patches that should be masked. Can be overridden by the `total_mask_patches` parameter in
            `preprocess`.
        mask_group_min_patches (`int`, *optional*, defaults to 16):
            Minimum number of patches that should be masked. Can be overridden by the `mask_group_min_patches`
            parameter in `preprocess`.
        mask_group_max_patches (`int`, *optional*):
            Maximum number of patches that should be masked. Can be overridden by the `mask_group_max_patches`
            parameter in `preprocess`.
        mask_group_min_aspect_ratio (`float`, *optional*, defaults to 0.3):
            Minimum aspect ratio of the mask window. Can be overridden by the `mask_group_min_aspect_ratio` parameter
            in `preprocess`.
        mask_group_max_aspect_ratio (`float`, *optional*):
            Maximum aspect ratio of the mask window. Can be overridden by the `mask_group_max_aspect_ratio` parameter
            in `preprocess`.
        codebook_do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the input for codebook to a certain. Can be overridden by the `codebook_do_resize`
            parameter in `preprocess`. `codebook_size`.
        codebook_size (`dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
            Resize the input for codebook to the given size. Can be overridden by the `codebook_size` parameter in
            `preprocess`.
        codebook_resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.LANCZOS`):
            Resampling filter to use if resizing the codebook image. Can be overridden by the `codebook_resample`
            parameter in `preprocess`.
        codebook_do_center_crop (`bool`, *optional*, defaults to `True`):
            Whether to crop the input for codebook at the center. If the input size is smaller than
            `codebook_crop_size` along any edge, the image is padded with 0's and then center cropped. Can be
            overridden by the `codebook_do_center_crop` parameter in `preprocess`.
        codebook_crop_size (`dict[str, int]`, *optional*, defaults to `{"height": 224, "width": 224}`):
            Desired output size for codebook input when applying center-cropping. Can be overridden by the
            `codebook_crop_size` parameter in `preprocess`.
        codebook_do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the input for codebook by the specified scale `codebook_rescale_factor`. Can be
            overridden by the `codebook_do_rescale` parameter in `preprocess`.
        codebook_rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Defines the scale factor to use if rescaling the codebook image. Can be overridden by the
            `codebook_rescale_factor` parameter in `preprocess`.
        codebook_do_map_pixels (`bool`, *optional*, defaults to `True`):
            Whether to map the pixel values of the codebook input to (1 - 2e)x + e. Can be overridden by the
            `codebook_do_map_pixels` parameter in `preprocess`.
        codebook_do_normalize (`bool`, *optional*, defaults to `True`):
            Whether or not to normalize the input for codebook with `codebook_image_mean` and `codebook_image_std`. Can
            be overridden by the `codebook_do_normalize` parameter in `preprocess`.
        codebook_image_mean (`Optional[Union[float, Iterable[float]]]`, *optional*, defaults to `[0, 0, 0]`):
            The sequence of means for each channel, to be used when normalizing images for codebook. Can be overridden
            by the `codebook_image_mean` parameter in `preprocess`.
        codebook_image_std (`Optional[Union[float, Iterable[float]]]`, *optional*, defaults to `[0.5, 0.5, 0.5]`):
            The sequence of standard deviations for each channel, to be used when normalizing images for codebook. Can
            be overridden by the `codebook_image_std` parameter in `preprocess`.
    pixel_valuesTNgp?Fru   rv   rw   rx   	do_resizesizeresampledo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   returnc                    t        |   di | ||nddd}t        |      }||nddd}t        |d      }||nddd}t        |d      }||nddd}t        |d      }|| _        || _        || _        || _        || _        || _        || _	        || _
        |	|	nt        | _        |
|
nt        | _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _         ||ntB        | _         ||| _#        y tD        | _#        y )	N   rU   r   
param_namep   r+   r.   r=   )$superrP   r	   r~   r   r   r   r   r   r   r   FLAVA_IMAGE_MEANr   FLAVA_IMAGE_STDr   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   FLAVA_CODEBOOK_MEANFLAVA_CODEBOOK_STDr4   ) rO   r~   r   r   r   r   r   r   r   r   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   kwargs	__class__s                                   r?   rP   zFlavaImageProcessor.__init__9  s   F 	"6"'tc-JT"!*!6IsUX<Y	!)D	)6)BSVadHe%mP3E3Q/bepsWt*+=J^_"	 $,,"((2(>*DT&/&;!2"4"4&<#&<#+F(+F(&<#"4*!2'>$"4#6 '>$&<#%:"#6 :M:Y#6_r 8J8V"4\nr>   image_processor_dictc                     |j                         }d|v r|j                  d      |d<   d|v r|j                  d      |d<   t        |   |fi |S )z
        Overrides the `from_dict` method from the base class to make sure parameters are updated if image processor is
        created using from_dict and kwargs e.g. `FlavaImageProcessor.from_pretrained(checkpoint, codebook_size=600)`
        r+   r.   )copypopr   	from_dict)clsr   r   r   s      r?   r   zFlavaImageProcessor.from_dict  sd      488:f$4:JJ4O 16)9?DX9Y !56w !5@@@r>   c                 $    t        ||||||      S )N)rC   r$   r%   r&   r'   r(   )rB   )rO   r#   r$   r%   r&   r'   r(   s          r?   masking_generatorz%FlavaImageProcessor.masking_generator  s#     %)1#9#9(C(C
 	
r>   imagedata_formatinput_data_formatc                     t        |      }d|vsd|vrt        d|j                                |d   |d   f}t        |f||||d|S )a  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BICUBIC`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

        Returns:
            `np.ndarray`: The resized image.
        rI   rJ   zFThe `size` dictionary must contain the keys `height` and `width`. Got )r   r   r   r   )r	   
ValueErrorkeysr
   )rO   r   r   r   r   r   r   output_sizes           r?   r
   zFlavaImageProcessor.resize  sy    F T"47$#6efjfofofqersttH~tG}5
#/
 
 	
r>   c                 .    ddt         z  z
  |z  t         z   S )NrF   rE   )r   )rO   r   s     r?   
map_pixelszFlavaImageProcessor.map_pixels  s    A)))U25FFFr>   do_map_pixelsc                    t        |||	|
||||||
       t        |      }|r t        |      rt        j	                  d       |t        |      }|r| j                  ||||      }|r| j                  |||      }|r| j                  |||      }|	r| j                  ||
||      }|r| j                  |      }|t        |||      }|S )zPreprocesses a single image.)
r   r   r   r   r   r   r   r~   r   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r   r   r   r   )r   r   r   )r   scaler   )r   meanstdr   )input_channel_dim)r   r   r   loggerwarning_oncer   r
   center_croprescale	normalizer   r   )rO   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   s                  r?   _preprocess_imagez%FlavaImageProcessor._preprocess_image  s    & 	&!)%!)	
 u%/%0s
 $ >u EKKe$]nKoE$$5yTe$fELLuNVgLhENNZYbsNtEOOE*E"/{VghEr>   imagesreturn_tensorsc"                    ||n| j                   }||n| j                  }t        |      }||n| j                  }||n| j                  }||n| j
                  }t        |d      }||n| j                  }||n| j                  }|	|	n| j                  }	|
|
n| j                  }
||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j                   }||n| j"                  }||n| j$                  }||n| j&                  }||n| j(                  }t        |d      }||n| j*                  }||n| j,                  }||n| j.                  }||n| j0                  }||n| j2                  }t        |d      }||n| j4                  }||n| j6                  }||n| j8                  }||n| j:                  }t=        |      }t?        |      stA        d      |D "cg c]!  }"| jC                  |"||||||||	|
|d| |!      # }#}"d|#i}$|r1|D "cg c]!  }"| jC                  |"|||||||||||| |!      # }%}"|%|$d	<   |r0| jE                  ||||||
      }&|D 'cg c]	  }' |&        }(}'|(|$d<   tG        |$|      S c c}"w c c}"w c c}'w )a  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the image.
            resample (`int`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`, Only
                has an effect if `do_resize` is set to `True`.
            do_center_crop (`bool`, *optional*, defaults to `self.do_center_crop`):
                Whether to center crop the image.
            crop_size (`dict[str, int]`, *optional*, defaults to `self.crop_size`):
                Size of the center crop. Only has an effect if `do_center_crop` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float` or `list[float]`, *optional*, defaults to `self.image_mean`):
                Image mean.
            image_std (`float` or `list[float]`, *optional*, defaults to `self.image_std`):
                Image standard deviation.
            return_image_mask (`bool`, *optional*, defaults to `self.return_image_mask`):
                Whether to return the image mask.
            input_size_patches (`int`, *optional*, defaults to `self.input_size_patches`):
                Size of the patches to extract from the image.
            total_mask_patches (`int`, *optional*, defaults to `self.total_mask_patches`):
                Total number of patches to extract from the image.
            mask_group_min_patches (`int`, *optional*, defaults to `self.mask_group_min_patches`):
                Minimum number of patches to extract from the image.
            mask_group_max_patches (`int`, *optional*, defaults to `self.mask_group_max_patches`):
                Maximum number of patches to extract from the image.
            mask_group_min_aspect_ratio (`float`, *optional*, defaults to `self.mask_group_min_aspect_ratio`):
                Minimum aspect ratio of the patches to extract from the image.
            mask_group_max_aspect_ratio (`float`, *optional*, defaults to `self.mask_group_max_aspect_ratio`):
                Maximum aspect ratio of the patches to extract from the image.
            return_codebook_pixels (`bool`, *optional*, defaults to `self.return_codebook_pixels`):
                Whether to return the codebook pixels.
            codebook_do_resize (`bool`, *optional*, defaults to `self.codebook_do_resize`):
                Whether to resize the codebook pixels.
            codebook_size (`dict[str, int]`, *optional*, defaults to `self.codebook_size`):
                Size of the codebook pixels.
            codebook_resample (`int`, *optional*, defaults to `self.codebook_resample`):
                Resampling filter to use if resizing the codebook pixels. This can be one of the enum
                `PILImageResampling`, Only has an effect if `codebook_do_resize` is set to `True`.
            codebook_do_center_crop (`bool`, *optional*, defaults to `self.codebook_do_center_crop`):
                Whether to center crop the codebook pixels.
            codebook_crop_size (`dict[str, int]`, *optional*, defaults to `self.codebook_crop_size`):
                Size of the center crop of the codebook pixels. Only has an effect if `codebook_do_center_crop` is set
                to `True`.
            codebook_do_rescale (`bool`, *optional*, defaults to `self.codebook_do_rescale`):
                Whether to rescale the codebook pixels values between [0 - 1].
            codebook_rescale_factor (`float`, *optional*, defaults to `self.codebook_rescale_factor`):
                Rescale factor to rescale the codebook pixels by if `codebook_do_rescale` is set to `True`.
            codebook_do_map_pixels (`bool`, *optional*, defaults to `self.codebook_do_map_pixels`):
                Whether to map the codebook pixels values.
            codebook_do_normalize (`bool`, *optional*, defaults to `self.codebook_do_normalize`):
                Whether to normalize the codebook pixels.
            codebook_image_mean (`float` or `list[float]`, *optional*, defaults to `self.codebook_image_mean`):
                Codebook pixels mean to normalize the codebook pixels by if `codebook_do_normalize` is set to `True`.
            codebook_image_std (`float` or `list[float]`, *optional*, defaults to `self.codebook_image_std`):
                Codebook pixels standard deviation to normalize the codebook pixels by if `codebook_do_normalize` is
                set to `True`.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                    - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        r   r   r+   r.   zSInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.TensorF)r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r}   codebook_pixel_values)r#   r$   r%   r&   r'   r(   bool_masked_pos)datatensor_type)$r~   r   r	   r   r   r   r   r   r   r   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r/   r0   r-   r.   r1   r2   r3   r4   r   r   r   r   r   r   ))rO   r   r~   r   r   r   r   r   r   r   r   r   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r   r   r   imgprocessed_imagesr   codebook_imagesmask_generator_maskss)                                            r?   
preprocesszFlavaImageProcessor.preprocess  s   x "+!6IDNN	'tTYYT"'38+9+E4K^K^!*!6IDNN	!)D	#-#9Zt
+9+E4K^K^'3'?|TEVEV#-#9Zt
!*!6IDNN	1B1N-TXTjTj3E3Q/W[WnWn3E3Q/W[WnWn&<&H"dNiNi 	 '=&H"dNiNi 	
 +6 (11 	$ +6 (11 	$ '=&H"dNiNi 	 4F3Q/W[WnWn)6)BHZHZ%mP1B1N-TXTjTj5H5T1Z^ZrZr'>'J#PTPlPl 	  (?'J#PTPlPl 	  4F3Q/W[WnWn*+=J^_&<&H"dNiNi 	 &;%F!DLfLf 	 6I5T1Z^ZrZr3E3Q/W[WnWn)&1F#rss& #
" ! ""#!-#%-)%##'"3 # 
 
&  01!$ "#" ! &&0&.#:02#:!620"8 +&7 ' O & -<D()!33#5#5'='=,G,G 4 N 066!^%6E6&+D"#>BBo
,< 7s   (&K&K$K)&r5   r6   r7   r8   model_input_namesr!   valid_kwargsr   BICUBICLANCZOSr9   dictstrr;   r<   r   rP   classmethodr   r   r   rB   r   rp   ndarrayr   r
   r   FIRSTr   listr   r   r   PILImager   __classcell__)r   s   @r?   r|   r|      sY   Un ((,L &*'9'A'A#+/&-!5948"'"$"$&(-1-048',#'%)!3!;!;(,)-$(/6'+&*>B=AAMoMo 38nt#Mo %	Mo
 Mo S>D(Mo Mo eMo Mo HUO+d2Mo 8E?*T1Mo  Mo  Mo  Mo  !$!Mo" !$d
#Mo$ &+%Mo& &+T\'Mo* !%+Mo, !-Mo. d{/Mo0 1Mo2 "&3Mo4  $J5Mo6 "7Mo8 "%u9Mo: !%;Mo<  $=Mo> #Xe_4t;?Mo@ "HUO3d:AMoD 
EMo^ 
AT#s(^ 
A 
A 
 

 
. (:'A'A59;?.
zz.
 38n.
 %	.

 ++d2.
 !11D8.
 
.
`G

 Grzz G "&&*.2&*+/"&'+$(1504%)/?/E/E59>> $;> 38nt#	>
 %t+> t> S>D(> 4K> > Tk> DK'$.> 4;&-> d{> &,> ,d2>  
!>@ %& "&&*.2&*+/"&'+$(1504)-)-)--1-14848.2*./3(,/348+/04.2-16:5926(8(>(>;?IqCqC $;qC 38nt#	qC
 %t+qC tqC S>D(qC 4KqC qC TkqC DK'$.qC 4;&-qC  $;qC  $JqC   $J!qC" !$d
#qC$ !$d
%qC& &+T\'qC( &+T\)qC, !%t-qC. !4K/qC0 CH~,1qC2 :3qC4 "&5qC6 !cNT17qC8 "D[9qC: "';qC< !%t=qC>  $d{?qC@ &e_t3AqCB %UOd2CqCD j(4/EqCF &GqCH !11D8IqCJ 
KqC 'qCr>   r|   )6r8   rL   rZ   collections.abcr   	functoolsr   typingr   numpyrp   image_processing_utilsr   r   r	   image_transformsr
   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   processing_utilsr   utilsr   r   r   r   utils.import_utilsr   r   
get_loggerr5   r   r   r   r   r   r   r<   r:   r!   rB   r|   __all__r=   r>   r?   <module>r      s    '   $    U U C    - ^ ^ *  
		H	% $ !% $  5 P0E P0hK K\ 
;pC, pC  pCf !
!r>   