
    qi(              
          d dl Z d dlmZ d dlmZmZ d dlZddlmZ ddl	m
Z
 ddlmZmZ ddlmZmZmZmZ dd	lmZmZmZ d
dlmZ ddlmZ erddlmZ d dlmc mc m Z! ddde"ee"   z  de#de"def
dZ$e G d de             Z%dgZ&y)    N)Iterable)TYPE_CHECKINGOptional   )BatchFeature)BaseImageProcessorFast)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDPILImageResamplingSizeDict)
TensorTypeauto_docstringrequires_backends   )BeitImageProcessorFast   )DPTImageProcessorKwargs)DepthEstimatorOutputinput_imagetorch.Tensoroutput_sizekeep_aspect_ratiomultiplereturnc                     dd}| j                   dd  \  }}|\  }}||z  }	||z  }
|r"t        d|
z
        t        d|	z
        k  r|
}	n|	}
 ||	|z  |      } ||
|z  |      }t        ||      S )Nc                     t        | |z        |z  }| ||kD  rt        j                  | |z        |z  }||k  rt        j                  | |z        |z  }|S N)roundmathfloorceil)valr   min_valmax_valxs        U/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/dpt/modular_dpt.pyconstrain_to_multiple_ofz>get_resize_output_image_size.<locals>.constrain_to_multiple_of4   s[    #.!H,1w;

3>*X5Aw;		#.)H4A    r   )r   heightwidth)r   N)shapeabsr   )r   r   r   r   r)   input_heightinput_widthoutput_heightoutput_widthscale_heightscale_width
new_height	new_widths                r(   get_resize_output_image_sizer9   .   s    	 !, 1 1"# 6L+"-M< !</L,Kq;#a,&6"77&L 'K),*EPXYJ({)BXVI:Y77r*   c            &          e Zd Zej                  ZeZeZ	dddZ
dZdZdZdZdZdZdZdZdZdZeZ	 	 	 	 d&dd	d
eded   dededz  dedd	fdZ	 d'dd	dedd	fdZded	   deded
eded   dedededededeee   z  dz  deee   z  dz  dededz  dededz  dedz  d ee z  dz  de!f&d!Z"	 d(d"d#d$e ee#eef      z  dz  dz  dee$ee f      fd%Z%y))DPTImageProcessorFasti  r,   TFgp?r   Nimager   sizeinterpolationztvF.InterpolationMode	antialiasensure_multiple_ofr   r   c                     |j                   r|j                  st        d|j                                t	        ||j                   |j                  f||      }t        j                  | ||||      S )a<  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing when resizing the image
            ensure_multiple_of (`int`, *optional*):
                If `do_resize` is `True`, the image is resized to a size that is a multiple of this value
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, and `do_resize` is `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.

        Returns:
            `torch.Tensor`: The resized image.
        zDThe size dictionary must contain the keys 'height' and 'width'. Got )r   r   r   )r>   r?   )r-   r.   
ValueErrorkeysr9   r   resize)selfr<   r=   r>   r?   r@   r   r   s           r(   rD   zDPTImageProcessorFast.resizeh   sr    : {{$**cdhdmdmdocpqrr2djj1/'	
 &,,%MY
 	
r*   size_divisorc                     |j                   dd \  }}d } |||      \  }} |||      \  }}	|||	|f}
t        j                  ||
      S )a  
        Center pad a batch of images to be a multiple of `size_divisor`.

        Args:
            image (`torch.Tensor`):
                Image to pad.  Can be a batch of images of dimensions (N, C, H, W) or a single image of dimensions (C, H, W).
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
        r+   Nc                 ^    t        j                  | |z        |z  }|| z
  }|dz  }||z
  }||fS )Nr   )r!   r#   )r=   rF   new_sizepad_sizepad_size_leftpad_size_rights         r(   _get_padz1DPTImageProcessorFast.pad_image.<locals>._get_pad   sA    yy!45DH$H$MM%5N .00r*   )r/   tvFpad)rE   r<   rF   r-   r.   rM   pad_top
pad_bottompad_left	pad_rightpaddings              r(   	pad_imagezDPTImageProcessorFast.pad_image   s`     BC(	1 'v|<&ul;)Wi<wwug&&r*   imagesdo_reduce_labels	do_resizedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stddo_paddisable_groupingreturn_tensorsc           	         |r| j                  |      }t        ||      \  }}i }|j                         D ]"  \  }}|r| j                  |||||      }|||<   $ t	        ||      }t        ||      \  }}i }|j                         D ]H  \  }}|r| j                  ||      }|r| j                  ||      }| j                  |||	|
||      }|||<   J t	        ||      }t        d|i|      S )N)ra   )r<   r=   r>   r@   r   pixel_values)datatensor_type)	reduce_labelr	   itemsrD   r
   center_croprU   rescale_and_normalizer   )rE   rV   rW   rX   r=   r>   rY   rZ   r[   r\   r]   r^   r_   r   r@   r`   rF   ra   rb   kwargsgrouped_imagesgrouped_images_indexresized_images_groupedr/   stacked_imagesresized_imagesprocessed_images_groupedprocessed_imagess                               r(   _preprocessz!DPTImageProcessorFast._preprocess   sC   , &&v.F 0EV^n/o,,!#%3%9%9%; 		;!E>!%("/'9&7 "- " -;"5)		; ((>@TU 0E^fv/w,,#% %3%9%9%; 		=!E>!%!1!1.)!L!%!M!77
NL*V_N /=$U+		= **BDXY.2B!CQ_``r*   outputsr   target_sizesc                    t        | d       |j                  }|"t        |      t        |      k7  rt        d      g }|dgt        |      z  n|}t	        ||      D ]s  \  }}|Yt
        j                  j                  j                  |j                  d      j                  d      |dd      j                         }|j                  d	|i       u |S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        torchNz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r   bicubicF)r=   modealign_cornerspredicted_depth)r   r{   lenrB   ziprw   nn
functionalinterpolate	unsqueezesqueezeappend)rE   rt   ru   r{   resultsdepthtarget_sizes          r(   post_process_depth_estimationz3DPTImageProcessorFast.post_process_depth_estimation   s    ( 	$(!11$3+?3|CT+To  8D8LvO 44R^"%o|"D 	7E;&++77OOA&003+Iej 8 ')  NN-u56	7 r*   )NTr   F)r   r   )&__name__
__module____qualname__r   BICUBICresampler   r^   r   r_   r=   rX   r[   r]   r`   r\   r@   r   rZ   rY   rW   r   valid_kwargsr   r   boolintrD   rU   listfloatstrr   r   rs   tupledictr    r*   r(   r;   r;   U   sI   !))H'J%IC(DIJLFNIN*L <@)*"'(
(
 (
   78	(

 (
  $J(
  (
 
(
Z '' ' 
	'88a^$8a 8a 	8a
 8a   788a 8a 8a 8a 8a 8a DK'$.8a 4;&-8a  8a  $J8a  !8a" Dj#8a$ +%8a& j(4/'8a* 
+8az JN''' !4c3h#884?$F' 
d3
?#	$	'r*   r;   )'r!   collections.abcr   typingr   r   rw   image_processing_baser   image_processing_utils_fastr   image_transformsr	   r
   image_utilsr   r   r   r   utilsr   r   r   beit.image_processing_beit_fastr   image_processing_dptr   modeling_outputsr   $torchvision.transforms.v2.functional
transformsv2r   rN   r   r   r9   r;   __all__r   r*   r(   <module>r      s      $ *  1 A E  
 E 9 8 2 2$8$8x}$$8 $8 	$8
 $8N y2 y yx #
#r*   