
    qi3                     @   d Z ddlmZmZ ddlmZ erddlmZ ddlZ	ddl
ZddlmZmZ ddlmZmZ dd	lmZmZmZmZmZmZmZmZmZmZ dd
lmZ ddlm Z m!Z!m"Z"m#Z#  e       rddl$Z$ e"jJ                  e&      Z' G d ded      Z( ed       G d de             Z)dgZ*y)zImage processor class for GLPN.    )TYPE_CHECKINGUnion   )requires)DepthEstimatorOutputN)BaseImageProcessorBatchFeature)resizeto_channel_dimension_format)
ChannelDimensionPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imageis_torch_availablemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)ImagesKwargs)
TensorTypefilter_out_non_signature_kwargsloggingrequires_backendsc                   &    e Zd ZU dZeed<   eed<   y)GLPNImageProcessorKwargsz
    size_divisor (`int`, *optional*, defaults to 32):
        When `do_resize` is `True`, images are resized so their height and width are rounded down to the closest
        multiple of `size_divisor`.
    size_divisorresampleN)__name__
__module____qualname____doc__int__annotations__r        `/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/glpn/image_processing_glpn.pyr   r   4   s       r&   r   F)total)vision)backendsc                       e Zd ZdZdgZeZddej                  ddfde	de
de	d	ed
z  dd
f
 fdZej                  d
d
fdej                  de
deded
z  deez  d
z  dej                  fdZ e       d
d
d
d
d
d
ej&                  d
fdedeed   ee   f   de	d
z  de
d
z  de	d
z  d	ed
z  deez  d
z  dedeez  d
z  defd       Z	 ddddeeee
e
f      z  d
z  d
z  deeeef      fdZ xZS )GLPNImageProcessora  
    Constructs a GLPN image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions, rounding them down to the closest multiple of
            `size_divisor`. Can be overridden by `do_resize` in `preprocess`.
        size_divisor (`int`, *optional*, defaults to 32):
            When `do_resize` is `True`, images are resized so their height and width are rounded down to the closest
            multiple of `size_divisor`. Can be overridden by `size_divisor` in `preprocess`.
        resample (`PIL.Image` resampling filter, *optional*, defaults to `Resampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by `resample` in `preprocess`.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.). Can be
            overridden by `do_rescale` in `preprocess`.
        rescale_factor (`float`, *optional*, defaults to `1 / 255`):
            The scaling factor to apply to the pixel values. Can be overridden by `rescale_factor` in `preprocess`.
    pixel_valuesT    gp?	do_resizer   
do_rescalerescale_factorNreturnc                 j    || _         || _        || _        || _        || _        t        |   di | y )Nr%   )r/   r0   r   r   r1   super__init__)selfr/   r   r   r0   r1   kwargs	__class__s          r'   r5   zGLPNImageProcessor.__init__W   s:     #$( ,"6"r&   imager   data_formatinput_data_formatc                 l    t        ||      \  }}||z  |z  }	||z  |z  }
t        ||	|
ff|||d|}|S )a*  
        Resize the image, rounding the (height, width) dimensions down to the closest multiple of size_divisor.

        If the image is of dimension (3, 260, 170) and size_divisor is 32, the image will be resized to (3, 256, 160).

        Args:
            image (`np.ndarray`):
                The image to resize.
            size_divisor (`int`):
                The image is resized so its height and width are rounded down to the closest multiple of
                `size_divisor`.
            resample:
                `PIL.Image` resampling filter to use when resizing the image e.g. `PILImageResampling.BILINEAR`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If `None`, the channel dimension format of the input
                image is used. Can be one of:
                - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the input image. If not set, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.

        Returns:
            `np.ndarray`: The resized image.
        )channel_dim)r   r:   r;   )r   r
   )r6   r9   r   r   r:   r;   r7   heightwidthnew_hnew_ws              r'   r
   zGLPNImageProcessor.resizeg   si    H 'u:KL,&5%4EN
 #/
 
 r&   imageszPIL.Image.Imagereturn_tensorsc
           	         ||n| j                   }||n| j                  }||n| j                  }||n| j                  }||n| j                  }t        |      }t        |      st        d      t        |||       |D 
cg c]  }
t        |
       }}
|r#t        |d         rt        j                  d       |	t        |d         }	|r"|D cg c]  }| j                  ||||	       }}|r!|D cg c]  }| j                  |||	       }}|D cg c]  }t!        |||	       }}d|i}t#        ||	      S c c}
w c c}w c c}w c c}w )
a	
  
        Preprocess the given images.

        Args:
            images (`PIL.Image.Image` or `TensorType` or `list[np.ndarray]` or `list[TensorType]`):
                Images to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_normalize=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the input such that the (height, width) dimensions are a multiple of `size_divisor`.
            size_divisor (`int`, *optional*, defaults to `self.size_divisor`):
                When `do_resize` is `True`, images are resized so their height and width are rounded down to the
                closest multiple of `size_divisor`.
            resample (`PIL.Image` resampling filter, *optional*, defaults to `self.resample`):
                `PIL.Image` resampling filter to use if resizing the image e.g. `PILImageResampling.BILINEAR`. Only has
                an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether or not to apply the scaling factor (to make pixel values floats between 0. and 1.).
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - `None`: Return a list of `np.ndarray`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                    - `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `ChannelDimension.LAST`: image in (height, width, num_channels) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        zSInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.Tensor)r/   sizer   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r   r   r;   )scaler;   )input_channel_dimr-   )datatensor_type)r/   r0   r1   r   r   r   r   
ValueErrorr   r   r   loggerwarning_oncer   r
   rescaler   r	   )r6   rB   r/   r   r   r0   r1   rC   r:   r;   imgr9   rH   s                r'   
preprocesszGLPNImageProcessor.preprocess   s   \ "+!6IDNN	#-#9Zt
+9+E4K^K^'3'?|TEVEV'38)&1F#rss 	&	
 288#.%88/&)4s
 $ >vay I $ ExctuF 
 lrchU.L]^F 
 ou
ej'{N_`
 
 '>BB9 9
s   E E7E
Eoutputsr   target_sizesc                 ~   t        | d       |j                  }|"t        |      t        |      k7  rt        d      g }|dgt        |      z  n|}t	        ||      D ]\  \  }}|B|d   }t
        j                  j                  j                  ||dd      }|j                         }|j                  d|i       ^ |S )	a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `list[tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `list[dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        torchNz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depth)NN.bicubicF)rE   modealign_cornerspredicted_depth)r   rW   lenrJ   ziprS   nn
functionalinterpolatesqueezeappend)r6   rP   rQ   rW   resultsdepthtarget_sizes          r'   post_process_depth_estimationz0GLPNImageProcessor.post_process_depth_estimation   s    ( 	$(!11$3+?3|CT+To  8D8LvO 44R^"%o|"D 	7E;&o.++77KV_ot7uNN-u56	7 r&   )N)r   r    r!   r"   model_input_namesr   valid_kwargsr   BILINEARboolr#   floatr5   npndarrayr   strr
   r   FIRSTr   r   listr	   rO   tupledictrb   __classcell__)r8   s   @r'   r,   r,   ?   s	   & ((+L #,,'.## #
 # # 
#( (:'B'B/3;?0zz0 0 %	0
 &,0 !11D80 
0d %& "&#'"&'+26(8(>(>;?]C'T:K5LdS]N^^_]C $;]C Dj	]C 4K]C ]C #S(4/]C &]C !11D8]C 
]C ']CD JN''' !4c3h#884?$F' 
d3
?#	$	'r&   r,   )+r"   typingr   r   utils.import_utilsr   modeling_outputsr   numpyrh   	PIL.ImagePILimage_processing_utilsr   r	   image_transformsr
   r   image_utilsr   r   r   r   r   r   r   r   r   r   processing_utilsr   utilsr   r   r   r   rS   
get_loggerr   rK   r   r,   __all__r%   r&   r'   <module>r}      s    & ' * 8   F C   - \ \  
		H	%!|5 ! 
;`+ `  `F  
 r&   