
    qiU                        d Z ddlmZ ddlZddlmZmZmZ ddl	m
Z
mZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZmZm Z  dd
l!m"Z"  e       rddl#Z#erddl$m%Z%  e       rddl&Z&ddl&m'Z'm(Z(  ejR                  e*      Z+	 ddejX                  de-ez  dz  fdZ.	 ddede-ez  dz  defdZ/defdZ0 G d ded      Z1 e"d       G d de             Z2dgZ3y)z%Image processor class for SuperPoint.    )TYPE_CHECKINGN   )BaseImageProcessorBatchFeatureget_size_dict)resizeto_channel_dimension_format)ChannelDimension
ImageInput	ImageTypePILImageResamplingget_image_typeinfer_channel_dimension_formatis_pil_imageis_scaled_imageis_torch_availableis_valid_imageis_vision_availableto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)ImagesKwargs)
TensorTypeloggingrequires_backends)requires   )SuperGlueKeypointMatchingOutput)Image	ImageDrawimageinput_data_formatc                    |t         j                  k(  rQ| j                  d   dk(  ryt        j                  | d   | d   k(        xr t        j                  | d   | d   k(        S |t         j
                  k(  rQ| j                  d   dk(  ryt        j                  | d   | d	   k(        xr t        j                  | d	   | d
   k(        S y )Nr   r   Tr   .r   .   ..r   .r   .r'   )r
   FIRSTshapenpallLAST)r!   r"   s     j/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/superglue/image_processing_superglue.pyis_grayscaler2   9   s     ,222;;q>QvveFmuV}45`"&&vRWX^R_A_:``	.33	3;;r?avveFmuV}45`"&&vRWX^R_A_:`` 
4    returnc                    t        t        dg       t        | t        j                        rt        | |      r| S |t        j                  k(  r7| d   dz  | d   dz  z   | d   dz  z   }t        j                  |gd	z  d
      }|S |t        j                  k(  r5| d   dz  | d   dz  z   | d   dz  z   }t        j                  |gd	z  d      }S t        | t        j                  j                        s| S | j                  d      } | S )a4  
    Converts an image to grayscale format using the NTSC formula. Only support numpy and PIL Image.

    This function is supposed to return a 1-channel image, but it returns a 3-channel image with the same value in each
    channel, because of an issue that is discussed in :
    https://github.com/huggingface/transformers/pull/25786#issuecomment-1730176446

    Args:
        image (Image):
            The image to convert.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the input image.
    visionr"   r$   gŏ1w-!?r%   gbX9?r&   gv/?r   r   )axisr)   r*   r+   r(   L)r   convert_to_grayscale
isinstancer.   ndarrayr2   r
   r,   stackr0   PILr   convert)r!   r"   
gray_images      r1   r:   r:   H   s   " *XJ7%$1BCL 0 6 66v/%-&2HH5QW=[aKaaJ:,"2;J  "2"7"77v/%-&2HH5QW=[aKaaJ:,"2<JeSYY__-MM#ELr3   imagesc                     d}d t        | t              rQt        |       dk(  rt        fd| D              r| S t        fd| D              r| D cg c]  }|D ]  }|  c}}S t	        |      c c}}w )N)z-Input images must be a one of the following :z - A pair of PIL images.z - A pair of 3D arrays.z! - A list of pairs of PIL images.z  - A list of pairs of 3D arrays.c                     t        |       xsC t        |       xr6 t        |       t        j                  k7  xr t        | j                        dk(  S )z$images is a PIL Image or a 3D array.r   )r   r   r   r   r>   lenr-   )r!   s    r1   _is_valid_imagez8validate_and_format_image_pairs.<locals>._is_valid_imagev   sG    E" 
5!fnU&;y}}&LfQTUZU`U`QaefQf	
r3   r'   c              3   .   K   | ]  } |        y wN .0r!   rE   s     r1   	<genexpr>z2validate_and_format_image_pairs.<locals>.<genexpr>}   s     #Q_U%;#Q   c              3      K   | ]:  }t        |t              xr$ t        |      d k(  xr t        fd|D               < yw)r'   c              3   .   K   | ]  } |        y wrG   rH   rI   s     r1   rK   z<validate_and_format_image_pairs.<locals>.<genexpr>.<genexpr>   s     CuOE*CrL   N)r;   listrD   r/   )rJ   
image_pairrE   s     r1   rK   z2validate_and_format_image_pairs.<locals>.<genexpr>   sN      
  z4( DJ1$DC
CCD
s   A A)r;   rO   rD   r/   
ValueError)rA   error_messagerP   r!   rE   s       @r1   validate_and_format_image_pairsrS   m   s    M
 &$v;!#Q&#Q QM 
 %	
 
 -3Kj
KuEKEKK
]
## Ls   A3c                       e Zd ZU dZeed<   y)SuperGlueImageProcessorKwargsz
    do_grayscale (`bool`, *optional*, defaults to `True`):
        Whether to convert the image to grayscale. Can be overridden by `do_grayscale` in the `preprocess` method.
    do_grayscaleN)__name__
__module____qualname____doc__bool__annotations__rH   r3   r1   rU   rU      s    
 r3   rU   F)total)torch)backendsc                       e Zd ZdZdgZddej                  dddfdedee	e
f   dz  ded	ed
ededdf fdZ	 	 ddej                  dee	e
f   de	ez  dz  de	ez  dz  fdZdddddddej"                  df	dedz  dee	e
f   dz  dedz  d	edz  d
edz  dedz  de	ez  dz  dede	ez  dz  defdZ	 ddddeee   z  dedeee	ej0                  f      fdZdedeee	ej0                  f      ded   fdZd Z xZS ) SuperGlueImageProcessorap  
    Constructs a SuperGlue image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Controls whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden
            by `do_resize` in the `preprocess` method.
        size (`dict[str, int]` *optional*, defaults to `{"height": 480, "width": 640}`):
            Resolution of the output image after `resize` is applied. Only has an effect if `do_resize` is set to
            `True`. Can be overridden by `size` in the `preprocess` method.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
        do_grayscale (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to grayscale. Can be overridden by `do_grayscale` in the `preprocess` method.
    pixel_valuesTNgp?	do_resizesizeresample
do_rescalerescale_factorrV   r4   c                     t        |   di | ||nddd}t        |d      }|| _        || _        || _        || _        || _        || _        y )Ni  i  )heightwidthFdefault_to_squarerH   )	super__init__r   rc   rd   re   rf   rg   rV   )	selfrc   rd   re   rf   rg   rV   kwargs	__class__s	           r1   rn   z SuperGlueImageProcessor.__init__   s^     	"6"'tc-JTU;"	 $,(r3   r!   data_formatr"   c                 L    t        |d      }t        |f|d   |d   f||d|S )aL  
        Resize an image.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary of the form `{"height": int, "width": int}`, specifying the size of the output image.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the output image. If not provided, it will be inferred from the input
                image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        Frk   ri   rj   )rd   rr   r"   )r   r   )ro   r!   rd   rr   r"   rp   s         r1   r   zSuperGlueImageProcessor.resize   sE    : TU;
x.$w-0#/	

 
 	
r3   return_tensorsc                 (   ||n| j                   }||n| j                  }||n| j                  }||n| j                  }||n| j                  }||n| j
                  }t        |d      }t        |      }t        |      st        d      t        |||||       |D cg c]  }t        |       }}t        |d         r|rt        j                  d       |
t        |d         }
g }|D ]]  }|r| j!                  ||||
      }|r| j#                  |||
      }|rt%        ||
	      }t'        ||	|

      }|j)                  |       _ t+        dt-        |      d      D cg c]
  }|||dz     }}d|i}t/        ||      S c c}w c c}w )ad  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image pairs to preprocess. Expects either a list of 2 images or a list of list of 2 images list with
                pixel values ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set
                `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the output image after `resize` has been applied. If `size["shortest_edge"]` >= 384, the image
                is resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the
                image will be matched to `int(size["shortest_edge"]/ crop_pct)`, after which the image is cropped to
                `(size["shortest_edge"], size["shortest_edge"])`. Only has an effect if `do_resize` is set to `True`.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. This can be one of `PILImageResampling`, filters. Only
                has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_grayscale (`bool`, *optional*, defaults to `self.do_grayscale`):
                Whether to convert the image to grayscale.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        Frk   zSInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, or torch.Tensor)rc   rd   re   rf   rg   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r!   rd   re   r"   )r!   scaler"   r7   )input_channel_dimr'   rb   )datatensor_type)rc   re   rf   rg   rV   rd   r   rS   r   rQ   r   r   r   loggerwarning_oncer   r   rescaler:   r	   appendrangerD   r   )ro   rA   rc   rd   re   rf   rg   rV   rt   rr   r"   rp   r!   
all_imagesiimage_pairsrx   s                    r1   
preprocessz"SuperGlueImageProcessor.preprocess   s   p "+!6IDNN	'38#-#9Zt
+9+E4K^K^'3'?|TEVEV'tTYYTU; 18F#rss%!)	
 6<<E.'<<6!9%*s
 $ >vay I
 	%E%dXars5Zkl,UFWX/{VghEe$	% 7<As:PQ6RSz!a!e,SS,>BB? =6 Ts   #F
(Foutputsr   target_sizes	thresholdc                    |j                   j                  d   t        |      k7  rt        d      t	        d |D              st        d      t        |t              r,t        j                  ||j                   j                        }n1|j                  d   dk7  s|j                  d   dk7  rt        d      |}|j                  j                         }||j                  d      j                  dddd      z  }|j                  t        j                        }g }t!        |j                   ||j"                  d	d	df   |j$                  d	d	df         D ]v  \  }}}	}
|d   dkD  }|d   dkD  }|d   |   }|d   |   }|	|   }|
|   }||kD  |dkD  z  ||j                  d   k  z  }||   }|||      }||   }|j'                  |||d
       x |S )a  
        Converts the raw output of [`SuperGlueKeypointMatchingOutput`] into lists of keypoints, scores and descriptors
        with coordinates absolute to the original image sizes.
        Args:
            outputs ([`SuperGlueKeypointMatchingOutput`]):
                Raw outputs of the model.
            target_sizes (`torch.Tensor` or `list[tuple[tuple[int, int]]]`, *optional*):
                Tensor of shape `(batch_size, 2, 2)` or list of tuples of tuples (`tuple[int, int]`) containing the
                target size `(height, width)` of each image in the batch. This must be the original image size (before
                any processing).
            threshold (`float`, *optional*, defaults to 0.0):
                Threshold to filter out the matches with low scores.
        Returns:
            `list[Dict]`: A list of dictionaries, each dictionary containing the keypoints in the first and second image
            of the pair, the matching scores and the matching indices.
        r   zRMake sure that you pass in as many target sizes as the batch dimension of the maskc              3   8   K   | ]  }t        |      d k(    yw)r'   N)rD   )rJ   target_sizes     r1   rK   zISuperGlueImageProcessor.post_process_keypoint_matching.<locals>.<genexpr>r  s     I[3{#q(Is   zTEach element of target_sizes must contain the size (h, w) of each image of the batch)devicer   r'   r(   N)
keypoints0
keypoints1matching_scores)maskr-   rD   rQ   r/   r;   rO   r^   tensorr   	keypointscloneflipreshapetoint32zipmatchesr   r}   )ro   r   r   r   image_pair_sizesr   results	mask_pairkeypoints_pairr   scoresmask0mask1r   r   matches0scores0valid_matchesmatched_keypoints0matched_keypoints1r   s                        r1   post_process_keypoint_matchingz6SuperGlueImageProcessor.post_process_keypoint_matchingZ  s   , <<a C$55qrrILIIsttlD)$||LATATU!!!$)\-?-?-Ba-G j   ,%%++-	 0 5 5b 9 A A"aA NN	LL-	:=LL)W__QT%:G<S<STUWXTX<Y;
 	6I~w aL1$EaL1$E'*51J'*51Ju~HUmG %y0X]CxR\RbRbcdReGefM!+M!:!+H],C!D%m4ONN"4"4'6#	2 r3   rA   keypoint_matching_outputzImage.Imagec           	         t        |      }|D cg c]  }t        |       }}t        dt        |      d      D cg c]
  }|||dz     }}g }t	        ||      D ]  \  }}|d   j
                  dd \  }	}
|d   j
                  dd \  }}t        j                  t        |	|      |
|z   dft        j                        }|d   |d|	d|
f<   |d   |d||
df<   t        j                  |      }t        j                  |      }|d   j                  d      \  }}|d   j                  d      \  }}t	        |||||d	         D ]  \  }}}}}| j                  |      }|j!                  ||||
z   |f|d
       |j#                  |dz
  |dz
  |dz   |dz   fd       |j#                  ||
z   dz
  |dz
  ||
z   dz   |dz   fd        |j%                  |        |S c c}w c c}w )a  
        Plots the image pairs side by side with the detected keypoints as well as the matching between them.

        Args:
            images (`ImageInput`):
                Image pairs to plot. Same as `SuperGlueImageProcessor.preprocess`. Expects either a list of 2
                images or a list of list of 2 images list with pixel values ranging from 0 to 255.
            keypoint_matching_output (List[Dict[str, torch.Tensor]]]):
                A post processed keypoint matching output

        Returns:
            `List[PIL.Image.Image]`: A list of PIL images, each containing the image pairs side by side with the detected
            keypoints as well as the matching between them.
        r   r'   Nr   r   )dtyper   r   r   )fillrj   black)r   )rS   r   r~   rD   r   r-   r.   zerosmaxuint8r   	fromarrayr    Drawunbind
_get_colorlineellipser}   )ro   rA   r   r!   r   r   r   rP   pair_outputheight0width0height1width1
plot_imageplot_image_pildrawkeypoints0_xkeypoints0_ykeypoints1_xkeypoints1_ykeypoint0_xkeypoint0_ykeypoint1_xkeypoint1_ymatching_scorecolors                             r1   visualize_keypoint_matchingz3SuperGlueImageProcessor.visualize_keypoint_matching  sI   & 185;<E.'<<273v;2JKQva!a%(KK'*;8P'Q 	+#J(m11"15OGV(m11"15OGV3w#8&6/1"MUWU]U]^J,6qMJxx&(),6qMJxx()"__Z8N>>.1D)4\)B)I)I!)L&L,)4\)B)I)I!)L&L,VYlL,TeHfW R[+{N 7		 +{V/C[Q  
 kAo{QaQ\_`Q`ahop 6)A-{Qf@TWX@XZehiZij    NN>*7	+8 A =Ks
   G G%c                 N    t        dd|z
  z        }t        d|z        }d}|||fS )zMaps a score to a color.   r   r   )int)ro   scorergbs        r1   r   z"SuperGlueImageProcessor._get_color  s4    q5y!"e1ayr3   )NN)g        )rW   rX   rY   rZ   model_input_namesr   BILINEARr[   dictstrr   floatrn   r.   r<   r
   r   r,   r   r   r   rO   tupler^   Tensorr   r   r   r   __classcell__)rq   s   @r1   ra   ra      sD   , (( &*'9'B'B '!)) 38nt#) %	)
 ) ) ) 
)4 6:;?%
zz%
 38n%
 ++d2	%

 !11D8%
T "&&*.2"&'+$(26(8(>(>;?oC $;oC 38nt#	oC
 %t+oC 4KoC oC TkoC j(4/oC &oC !11D8oC 
oCj 	B2B !4;.B 	B
 
d3$%	&BJ44 #'tC,='>"?4 
m		4nr3   ra   rG   )4rZ   typingr   numpyr.   image_processing_utilsr   r   r   image_transformsr   r	   image_utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   r   processing_utilsr   utilsr   r   r   utils.import_utilsr   r^   modeling_supergluer   r>   r   r    
get_loggerrW   rz   r<   r   r2   r:   rS   rU   ra   __all__rH   r3   r1   <module>r      s   ,    U U C     - ; ; * C$			H	% 8<a::a--4a" 8<""--4" "J$J $8L  
:H0 H HV
 %
%r3   