
    qi                     D   d dl mZ d dlmZ d dlZd dlZd dlmZ d dlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ e ed       G d de                    Z G d dej@                        Z!d Z"dAdZ#dejH                  de%dejH                  fdZ&	 dBdej@                  dejH                  d ejH                  d!ejH                  d"ejH                  dz  d#e'd$e'd%ee   fd&Z( ee#       G d' d(ej@                               Z) G d) d*ej@                        Z* G d+ d,ej@                        Z+d-ejH                  d.ejH                  d/ejH                  dejH                  fd0Z, G d1 d2ej@                        Z- G d3 d4ej@                        Z.e G d5 d6e             Z/d7ejH                  d8e'de0ejH                  ejH                  f   fd9Z1d:ejH                  d;e%d<e%dejH                  fd=Z2 ed>       G d? d@e/             Z3d6d@gZ4y)C    )Callable)	dataclassN)nnpad_sequence   )ACT2FN)use_kernelized_func)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstring)can_return_tuple   )AutoModelForKeypointDetection   )LightGlueConfiga  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   ^   e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  dz  ed<   dZ
ej                  dz  ed<   dZej                  dz  ed<   dZej                  dz  ed<   dZeej                     dz  ed	<   dZeej                     dz  ed
<   y)LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)__name__
__module____qualname____doc__r   torchFloatTensor__annotations__r   r   r   r   	IntTensorr   r    tupler!        b/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/lightglue/modeling_lightglue.pyr   r   '   s    0 &*D%

d
")(,GU%,04OU&&-4*.Iu  4'.$(E5??T!(%)D%

d
")59M5**+d2926Je''(4/6r,   r   c            
            e Zd Zdef fdZ	 ddej                  dedz  deej                     eej                  ej                  f   z  fdZ	 xZ
S )	LightGluePositionalEncoderconfigc                     t         |           t        j                  d|j                  |j
                  z  dz  d      | _        y )Nr   Fbias)super__init__r   Lineardescriptor_dimnum_attention_heads	projectorselfr0   	__class__s     r-   r5   z#LightGluePositionalEncoder.__init__U   s:    1f&;&;v?Y?Y&Y]^&^ejkr,   r   output_hidden_statesNreturnc                     | j                  |      }|j                  dd      }t        j                  |      }t        j                  |      }||f}|r||f}|S |f}|S )Nr   dim)r9   repeat_interleaver&   cossin)r;   r   r=   projected_keypoints
embeddingscosinessinesoutputs           r-   forwardz"LightGluePositionalEncoder.forwardY   sq     #nnY7(::1":E
))J'		*%u%
6J*12 R\P]r,   F)r"   r#   r$   r   r5   r&   Tensorboolr*   rK   __classcell__r<   s   @r-   r/   r/   T   s]    l l
 LQ		=AD[		u||	uU\\5<<%?@	@	r,   r/   c                     | dd d df   }| ddd df   }t        j                  | |gd      j                  d      }|S )N.r   r   r@   rA   )r&   stackflatten)xx1x2rot_xs       r-   rotate_halfrY   e   sL    	
3!8B	
319BKK"b	r*2226ELr,   c                 6   | j                   }| j                         } |j                         }|j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }|j	                  |      |j	                  |      fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    dtype)r\   float	unsqueezerY   to)qkrD   rE   unsqueeze_dimr\   q_embedk_embeds           r-   apply_rotary_pos_embre   m   s    $ GGE		A		A
--
&C
--
&C3w;q>C/0G3w;q>C/0G::E:"GJJUJ$;;;r,   r    n_repr>   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shapeexpandreshape)r    rf   batchnum_key_value_headsslenhead_dims         r-   	repeat_kvro      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr,   modulequerykeyvalueattention_maskscalingdropoutkwargsc                    t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
||
|z   }
t
        j                  j                  |
dt        j                        j                  |j                        }
t
        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr   r   r@   )rB   r\   )ptrainingr   )ro   num_key_value_groupsr&   matmul	transposer   
functionalsoftmaxfloat32r_   r\   rv   rz   
contiguous)rp   rq   rr   rs   rt   ru   rv   rw   
key_statesvalue_statesattn_weightsattn_outputs               r-   eager_attention_forwardr      s     3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!#n4==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r,   c                   2    e Zd ZdZdedef fdZ	 	 	 	 ddej                  de	ej                  ej                  f   dz  dej                  dz  d	ej                  dz  d
ej                  dz  de
e   de	ej                  ej                  dz  f   fdZ xZS )LightGlueAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr0   	layer_idxc                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nrn   g      Tr2   )r4   r5   r0   r   getattrhidden_sizer8   rn   rl   r{   ru   attention_dropout	is_causalr   r6   attention_biasq_projk_projv_projo_projr;   r0   r   r<   s      r-   r5   zLightGlueAttention.__init__   sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r,   Nr    position_embeddingsrt   encoder_hidden_statesencoder_attention_maskrw   r>   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }||\  }}t        |	|||      \  }	}t        j                  | j                  j                  t              } || |	|||f| j                  sdn| j                  | j                  d|\  }} |j                   g |d j#                         }| j%                  |      }||fS )Nr@   r   r           )rv   ru   )rh   rn   r   viewr}   r   r   re   r   get_interfacer0   _attn_implementationr   rz   r   ru   rj   r   r   )r;   r    r   rt   r   r   rw   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_maskr   r   rD   rE   attention_interfacer   r   s                       r-   rK   zLightGlueAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST2$>2D.-;M!7Sa[[055lCMMaQRS
{{>277EOOPQSTU**HC';L*VY[^'_$L*(?(M(MKK,,.E)
 %8"	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r,   )NNNN)r"   r#   r$   r%   r   intr5   r&   rM   r*   r   r   rK   rO   rP   s   @r-   r   r      s    G
 
3 
4 IM.2596:*)||*) #5<<#=>E*) t+	*)
  %||d2*) !&t 3*) -.*) 
u||U\\D00	1*)r,   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueMLPr0   c                 h   t         |           || _        t        |j                     | _        t        j                  |j                  |j                        | _	        t        j                  |j                  |j                        | _        t        j                  |j                  d      | _        y )NT)elementwise_affine)r4   r5   r0   r	   
hidden_actactivation_fnr   r6   intermediate_sizefc1r   fc2	LayerNorm
layer_normr:   s     r-   r5   zLightGlueMLP.__init__   s}    #F$5$5699V55v7O7OP99V55v7I7IJ,,v'?'?TXYr,   r    r>   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S N)r   r   r   r   )r;   r    s     r-   rK   zLightGlueMLP.forward   sB    /6**=9/r,   	r"   r#   r$   r   r5   r&   rM   rK   rO   rP   s   @r-   r   r      s,    Z ZU\\ ell r,   r   c                        e Zd Zdedef fdZ	 	 ddej                  dej                  dej                  dedz  d	edz  d
e	ej                  e	ej                     dz  e	ej                     dz  f   fdZ
 xZS )LightGlueTransformerLayerr0   r   c                     t         |           t        ||      | _        t	        |      | _        t        ||      | _        t	        |      | _        y r   )r4   r5   r   self_attentionr   self_mlpcross_attention	cross_mlpr   s      r-   r5   z"LightGlueTransformerLayer.__init__  sD    0C$V,1&)D%f-r,   descriptorsr   rt   r=   Noutput_attentionsr>   c                    |rdnd }|rdnd }|r||fz   }|j                   \  }}	}
| j                  ||||      \  }}t        j                  ||gd      }| j	                  |      }||z   }|r||f}|j                  dd|	|
      j                  d      j                  ||	|
      }|6|j                  dddd|	      j                  d      j                  |dd|	      nd }| j                  ||||      \  }}t        j                  ||gd      }| j                  |      }||z   }|r6||f}||j                  ||	|
      fz   z   |j                  ||	|
      fz   |z   }|r
||fz   |fz   }|||fS )Nr+   )r   rt   r   r@   rA   r   r   )r   r   r   )	rh   r   r&   catr   rj   flipr   r   )r;   r   r   rt   r=   r   all_hidden_statesall_attentions
batch_sizenum_keypointsr7   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statess                           r-   rK   z!LightGlueTransformerLayer.forward  s	    #7BD0d 1[N B4?4E4E1
M> -1,?,? ))/	 -@ -
)/ $ii6F(GRP&9:%0=%@",?+O( '..r1m^TT!WWZ? 	 ) ""2q!Q>CCAFNNz[\^_ano 	 483G3G&"7#9/	 4H 4
0 0 %*II/IKa.bhj$k!"nn-FG03FF-FH[,\)!-55j-Q_`bc./ &&z=.QST 0	0  +.@@DTCVVN-~==r,   )FF)r"   r#   r$   r   r   r5   r&   rM   rN   r*   rK   rO   rP   s   @r-   r   r     s    . .3 . -2).H>\\H> <<H> 	H>
 #TkH>  $;H> 
u||U5<<047u||9Lt9SS	TH>r,   r   
similaritymatchability0matchability1c                    | j                   \  }}}t        j                  j                  |      t        j                  j                  |      j	                  dd      z   }t        j                  j                  | d      }t        j                  j                  | j	                  dd      j                         d      j	                  dd      }| j                  ||dz   |dz   fd      }	||z   |z   |	ddd|d|f<   t        j                  j                  |j                  d             |	dddddf<   t        j                  j                  |j                  d             |	dddddf<   |	S )z;create the log assignment matrix from logits and similarityr   r   r@   rR   r   N)	rh   r   r~   
logsigmoidr}   log_softmaxr   new_fullsqueeze)
r   r   r   r   num_keypoints_0num_keypoints_1certaintiesscores0scores1scoress
             r-   sigmoid_log_double_softmaxr   Z  sS    4>3C3C0J--**=9BMM<T<TUb<c<m<mnoqr<ssKmm''
A6Gmm''
(<(<R(D(O(O(QSTU__`bdfgG  *o.A?UVCV!WYZ[F4;g4E4SF1 0 00111=3H3H3L2LMF1crc2:11=3H3H3L2LMF1b#2#:Mr,   c                        e Zd Zdef fdZdej                  dej                  dej                  fdZdej                  dej                  fdZ xZ	S )LightGlueMatchAssignmentLayerr0   c                     t         |           |j                  | _        t        j                  | j                  | j                  d      | _        t        j                  | j                  dd      | _        y )NTr2   r   )r4   r5   r7   r   r6   final_projectionmatchabilityr:   s     r-   r5   z&LightGlueMatchAssignmentLayer.__init__j  sY    $33 "		$*=*=t?R?RY] ^IId&9&914Hr,   r   r   r>   c                    |j                   \  }}}| j                  |      }|t        j                  | j                  |j
                        dz  z  }|j                  |dz  d||      }|d d df   }|d d df   }||j                  dd      z  }	||j                  |dz  d|      }|d d df   j                  d      }
|d d df   j                  d      j                  dd      }|
|z  }|	j                  |dk(  t        j                  |	j                        j                        }	| j                  |      }|j                  |dz  d|d      }|d d df   }|d d df   }t        |	||      }|S )Ndeviceg      ?r   r   r   r@   rR   )rh   r   r&   tensorr7   r   rj   r}   r^   masked_fillfinfor\   minr   r   )r;   r   r   r   r   r7   m_descriptorsm_descriptors0m_descriptors1r   mask0mask1r   matchability_0matchability_1r   s                   r-   rK   z%LightGlueMatchAssignmentLayer.forwardq  s   4?4E4E1
M>--k:%T5H5HQ^QeQe(fjn(nn%--jAoq-Q_`&q!t,&q!t,#n&>&>r2&FF
<<
aMBDAJ((,EAJ((,66r2>E5=D#//	5;;zGWGW;X;\;\]J ((5#++J!OQqQ%ad+%ad+ ,JWr,   c                     | j                  |      }t        j                  j                  |      j	                  d      }|S )z0Get matchability of descriptors as a probabilityr@   )r   r   r~   sigmoidr   )r;   r   r   s      r-   get_matchabilityz.LightGlueMatchAssignmentLayer.get_matchability  s7    ((5}},,\:BB2Fr,   )
r"   r#   r$   r   r5   r&   rM   rK   r   rO   rP   s   @r-   r   r   i  sR    I I5<< u||  4ELL U\\ r,   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )LightGlueTokenConfidenceLayerr0   c                 l    t         |           t        j                  |j                  d      | _        y )Nr   )r4   r5   r   r6   r7   tokenr:   s     r-   r5   z&LightGlueTokenConfidenceLayer.__init__  s&    YYv44a8
r,   r   r>   c                     | j                  |j                               }t        j                  j	                  |      j                  d      }|S )Nr@   )r   detachr   r~   r   r   )r;   r   r   s      r-   rK   z%LightGlueTokenConfidenceLayer.forward  s=    

;--/0%%e,44R8r,   r   rP   s   @r-   r   r     s*    9 9
5<< ELL r,   r   c                   4    e Zd ZU dZeed<   dZdZdZdZ	dZ
dZy)	LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r0   	lightgluepixel_values)imageFTN)r"   r#   r$   r%   r   r(   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_flash_attn_supports_sdpar+   r,   r-   r   r     s1    
 #$O!&+#Nr,   r   r   	thresholdc                 6   | j                   \  }}}| ddddddf   j                  d      }| ddddddf   j                  d      }|j                  }|j                  }t        j                  |j                   d   |j
                        d   }t        j                  |j                   d   |j
                        d   }	||j                  d|      k(  }
|	|j                  d|      k(  }|j                  j                         }|j                  d      }t        j                  |
||      }t        j                  ||j                  d|      |      }|
||kD  z  }||j                  d|      z  }t        j                  ||d      }t        j                  ||d      }t        j                  ||g      j                  dd      j                  |dz  d      }t        j                  ||g      j                  dd      j                  |dz  d      }||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]Nr@   r   r   r   r   )rh   maxindicesr&   aranger   gathervaluesexp
new_tensorwhererS   r}   rj   )r   r  r   _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1r   r   s                      r-   get_matches_from_scoresr    s   ||J1!SbS#2#+""1%D!SbS#2#+""1%D||H||H ||HNN1-hooFtLH||HNN1-hooFtLH(//!X66G(//!X66G ;;??D??1D{{7D$7{{7,<,C,CAx,PRVW(945Fv}}Q11F {{68R0H{{68R0Hkk8X./99!Q?GG
UVXZ[Gkk#35E"FGQQRSUVW__`jmn`nprsOO##r,   r   heightwidthc                     t        j                  ||g| j                  | j                        d   }|dz  }|j	                  d      j
                  dz  }| |ddddf   z
  |d   z  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r   r\   Nr   r@   .).NN)r&   r   r   r\   r  r
  )r   r  r  sizeshiftscales         r-   normalize_keypointsr$    sp     <<	0@0@	XY]^D1HEHHRL!#EU3a<00E/4JJIr,   zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       e Zd ZdZdef fdZdedefdZ	 d#de	j                  de	j                  d	ed
z  dee	j                  ee	j                  e	j                  f   f   fdZde	j                  dede	j                  de	j                  de	j                  f
dZd$dZde	j                  de	j                  dede	j                  fdZde	j                  de	j                  de	j                  de	j                  de	j                  de	j                  defdZd Zde	j                  de	j                  de	j                  de	j                  dee	j                  e	j                  f   f
dZ	 	 	 d%de	j                  de	j                  dedede	j                  d
z  ded
z  d	ed
z  dee	j                  e	j                  e	j                  eef   fdZee	 	 	 d%d e	j.                  d!e	j0                  d
z  ded
z  d	ed
z  deez  f
d"              Z xZS )&LightGlueForKeypointMatchingar  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://huggingface.co/papers/2306.13643
    r0   c           	      ,   t         |   |       t        j                  |j                  |j
                        | _        |j                  j                  | _        |j                  | _	        |j                  | _        |j                  | _        |j                  | _        |j                  | _        | j                  | j                  k7  r2t        j                   | j                  | j                  d      | _        nt        j$                         | _        t'        |      | _        t        j*                  t-        |j                        D cg c]  }t/        ||       c}      | _        t        j*                  t-        |j                        D cg c]  }t3        |       c}      | _        t        j*                  t-        |j                  dz
        D cg c]  }t7        |       c}      | _        | j;                          y c c}w c c}w c c}w )N)trust_remote_codeTr2   )r   r   )r4   r5   r   from_configkeypoint_detector_configr(  keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr7   num_hidden_layers
num_layersfilter_thresholddepth_confidencewidth_confidencer   r6   input_projectionIdentityr/   positional_encoder
ModuleListranger   transformer_layersr   match_assignment_layersr   token_confidence	post_init)r;   r0   ir  r<   s       r-   r5   z%LightGlueForKeypointMatching.__init__  s    !>!J!J++v?W?W"
 170O0O0f0f-$33 22 & 7 7 & 7 7 & 7 7$"G"GG$&IId.S.SUYUhUhos$tD!$&KKMD!"<V"D"$--EJ6KcKcEde&v;e#
 (*}}<A&BZBZ<[\q*62\(
$ !#<A&BZBZ]^B^<_`q*62`!
 	 f ] as   HHHlayer_indexr>   c                     ddt        j                  d|z  | j                  z        z  z   }t        j                  |dd      S )z-scaled confidence threshold for a given layerg?g?g      r   r   )npr  r/  clip)r;   r=  r  s      r-   _get_confidence_thresholdz6LightGlueForKeypointMatching._get_confidence_threshold  s;    #tk'9DOO'K LLL	wwy!Q''r,   r   r   r=   Nc                     |j                         j                         }| j                  |      }| j                  ||      }||fS )Nr=   )r   r   r3  r5  )r;   r   r   r=   projected_descriptorskeypoint_encoding_outputs         r-   _keypoint_processingz1LightGlueForKeypointMatching._keypoint_processing  sO     "((*557 $ 5 5k B#'#:#:9[o#:#p $&>>>r,   keypoint_confidencesr   
num_pointsc                 |   |j                   \  }}|| j                  dz
  k  ru|j                  |dk(  d      }|j                  |dz  d      }| j	                  |      }d||k  j                         j                  d      |z  z
  }|| j                  kD  }	|	S t        j                  |t        j                        }	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsr   r   r   r@   g      ?rA   r[   )rh   r/  r   rj   rA  r]   sumr1  r&   onesrN   )
r;   rG  r=  r   rH  r   r  r  ratio_confidentearly_stopped_pairss
             r-   _get_early_stopped_image_pairsz;LightGlueForKeypointMatching._get_early_stopped_image_pairs%  s     


A1,, $8#C#CDAIq#Q #7#?#?
aQS#T 66{CI!%9I%E$L$L$N$R$RWX$R$Y\f$ffO"1D4I4I"I
 #" #(**Zuzz"J""r,   c                     |
||   }||   } | j                   |   ||      }t        || j                        \  }}||fS r   )r9  r  r0  )r;   r   r   r=  early_stopsr   r   r   s           r-   _get_keypoint_matchingz3LightGlueForKeypointMatching._get_keypoint_matching8  sW    "%k2K$D:--k:;M#:64CXCX#Y ''r,   confidencesr   c                 \    |d| j                   z
  kD  }|||| j                  |      k  z  }|S )z#mask points which should be removedr   )r2  rA  )r;   rR  r   r=  keeps        r-   _get_pruning_maskz.LightGlueForKeypointMatching._get_pruning_mask@  s<    T2223"K4#A#A+#NNNDr,   r  prune_outputc                    |j                   \  }}	}	| j                  |   j                  |      }
| j                  ||
|      j	                  |dk(  t        j                  d            fd||d   |d   |fD        \  }}}}}t        |      D ]  }||||   fxx   dz  cc<    d ||||fD        \  }}}}||f}t        |dd      }|||||fS )	z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc              3   n   K   | ]&  }t        |      D cg c]
  \  }}||    c}} ( y c c}}w wr   )zip).0r   tr   pruned_keypoints_masks       r-   	<genexpr>zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>[  s9      c
 %(0E$FGDQtWGc
Gs   5/5r   c              3   6   K   | ]  }t        |d         yw)T)batch_firstNr   )rZ  pruned_tensors     r-   r]  zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>c  s$      S
 D99S
s   Tr@   r_  padding_value)	rh   r9  r   rU  r   r&   r   r7  r   )r;   r   r   r   r  rV  rG  r=  r   r  descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesr<  pruned_keypointsr\  s                     @r-   _do_layer_keypoint_pruningz7LightGlueForKeypointMatching._do_layer_keypoint_pruningG  s+    ',,
Aq#'#?#?#L#]#]^i#j  $ 6 67KMegr s 5 A A$!)U\\Z_M` ac
&	!ilDY[bcc
_.0BKQ_ z" 	4AN1--.!3.	4S
"46HJ\^i!jS
O.0BK /0BC%n$VXY!#3^[R^^^r,   c                     t        j                        t        j                  j                  d         }|   }|   d ||fD        \  }}d ||fD        \  }}fd||||fD        \  }}}}||||fS )Nr   c              3   8   K   | ]  }t        |d d        yw)Tr@   ra  Nr   rZ  r   s     r-   r]  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>y  s$      3
 TDD3
   c              3   8   K   | ]  }t        |d d        yw)Tr   ra  Nr   rm  s     r-   r]  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>}  s$      >
 TCC>
rn  c              3   (   K   | ]	  }|     y wr   r+   )rZ  r   early_stops_indicess     r-   r]  zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>  s!      g
 &'g
s   )r&   rS   r  rh   )r;   rq  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsr   r   idsorder_indicess    `      r-   _concat_early_stopped_outputsz:LightGlueForKeypointMatching._concat_early_stopped_outputsl  s     $kk*=>ll.44Q78+C01-@3
"$BC3
//>
*,MN>
::g
 .1	g
c"@Bc ./PRY[jjjr,   r   r   r   c                    |j                   \  }fd|||fD        \  }}}|d d df   }|d d df   }|d d df   }|d d df   }	|d d df   }
|d d df   }t        j                  dz  d|fd|j                  |j                        }t        j
                  dz  d|f|j                  |j                        }t        dz        D ]  }t        j                  ||   dk(  d||   j                  d||   j                  d                  ||d||   f<   t        j                  |	|   dk(  d||   j                  d|	|   j                  d                  ||d||   f<   |
|   ||d||   f<   ||   ||d||   f<    ||fS )Nc              3   J   K   | ]  }|j                  d z  d d        yw)r   r@   N)rj   )rZ  r   r   s     r-   r]  zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>  s'      -
7=FNN:?Ar2-
s    #r   r   r   r@   r   )r   )
rh   r&   fullr   r\   zerosr7  r  r	  clamp)r;   r  r   r   r   r  r  r  r  r  r  r  _matches_matching_scoresr<  r   s                  @r-   _do_final_keypoint_pruningz7LightGlueForKeypointMatching._do_final_keypoint_pruning  s     
A-
BI7TcAd-
)/ 1a4=1a4=1a4=1a4=*1a40*1a40 ::zQ=A2gnndkdqdqr ;;1_a/oNcNc
 zQ' 	FA*/++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' +0++r!2x{'9'9!Xa[=N=NST=N=U'V+HQ8A;&' 3C12EQ8A;./2B12EQ8A;./	F )))r,   r  r  r   c           
      &	  ( |rdnd }|rdnd }	|j                   d   dk(  rT|j                   d d }
|j                  |
dt        j                        |j	                  |
      |j	                  |
      ||	fS |j
                  }|j                   \  }}}}t        j                  |j                  |d      d      }|j                  |dz  |d      }||j                  |dz  |      nd }|j                  |dz  || j                        }t        j                  |dz  |      }t        |||      }| j                  |||	      \  }}|d   }| j                  dkD  }| j                  dkD  }g }g }g }g }g }t        j                  d||      j                  |dz  d      }t        j                  |      }t!        | j"                        D ]3  }|j%                         }|| j'                  ||      }n&t        j(                  ||d
   f|j
                        } | j*                  |   |||||      }|\  }}} |r||z   }|r|	| z   }	|r|| j"                  dz
  k  r+ | j,                  |   |      }!| j/                  |!|||      }"n%t        j(                  |t        j0                        }"t        j2                  |"      r|"j5                  d      (|(   }#| j7                  |||(      \  }$}%|j9                  t;        |#             |j9                  t;        |$             |j9                  t;        |%             |r:|j9                  t;        |(                |j9                  t;        |(                ||"    }t=        (fd||d   |d   ||fD              \  }}&}'}}|&|'f}|rt=        (fd||!fD              \  }}}!t        j>                  |"      r n$|s| jA                  |||||!|      \  }}}}}6 |r4|r2| jC                  |||||      \  }}}}| jE                  ||||      \  }}nE| j7                  ||| j"                  dz
        \  }}t        j                  |      | j"                  z  }|j                  |d|      }|||||	fS )Nr+   r   r   r@   r[   r   rA   r   rC  rR   )rt   r=   r   )rH  )rP  c              3   *   K   | ]
  }|      y wr   r+   rZ  r   rP  s     r-   r]  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>  s"      V" |,V   c              3   *   K   | ]
  }|      y wr   r+   r  s     r-   r]  zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>!  s"      l & #K<0lr  )#rh   r   r&   r   	new_zerosr   rJ  rj   r-  r  r$  rF  r1  r2  ri   	ones_liker7  r/  r!  get_extended_attention_maskrK  r8  r:  rN  rN   anyrC   rQ  extendlistr*   allrj  rv  r~  ))r;   r   r   r  r  r   r   r=   r   r   rh   r   r   r  initial_num_keypointsnum_points_per_pairimage_indicesrE  do_early_stopdo_keypoint_pruningrq  r   r   rr  rs  pruned_keypoints_indicespruned_keypoints_iterationsr=  r   extended_attention_masklayer_outputr    	attentionrG  rM  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1rP  s)                                           @r-   _match_image_pairz.LightGlueForKeypointMatching._match_image_pair  sg    #7BD0d??1"OOCR(E""5"EII">##E*##E*!  !!2;///
A,a#iiZ(D!L%%j1n6KQO	FJFVt||JN,AB\`!))*q.:OQUQvQvwZ!^FC'	65A	040I0I9M 1J 1
-- -Q/	 --1 #33a7 )+&,.)#(<<3HQW#X#_#_`jmn`npr#s &+oo6N&O# 1 R	K%**,K*.*J*J4Q\*]'*/**j+b/5R[d[k[k*l'?422;?6%9"3L 5A1K	#$5$E! !/)!;1!44+M4+@+@+Mk+Z( +/*M*M,k4L_ +N +'
 +0**Zuzz*R'9901 #6"G"G"JK2?2L/KOKfKf#T;K Lg LH)+H (..t4O/PQNN4(=#>?#**40M+NO*6==dC[\gCh>ij9@@FabmFnAop +>?R>R*S'PU V'2IaL)A,PTVc&dV QMKj$ "-j 9I*fk l !9 ; 4+l gc02MOc 9901" 33#!03,# dY(@$HcQR	h 0 22'25# h*,MwXg (,'F'F.%	($G_ (,'B'B;PTVZVeVehiVi'j$G_050PSWSbSb0b-,M,U,U0-
)
 -
 	
r,   r   labelsc           
      0   d }|t        d      ||n| j                  j                  }||n| j                  j                  }|j                  dk7  s|j                  d      dk7  rt        d      |j                  \  }}}	}
}|j                  |dz  |	|
|      }| j                  |      }|d d \  }}}}|j                  |ddd      j                  |      }|j                  |dd| j                        j                  |      }|j                  |dd      }|j                         }|d d d d d d df   |z  |d d d d d d df<   |d d d d d d df   |
z  |d d d d d d df<   | j                  |||
||||	      \  }}}}}t        ||||||||
      S )Nz9LightGlue is not trainable, no labels should be provided.   r   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)   r@   r   )r   r   r=   )r   r   r   r   r   r   r    r!   )
ValueErrorr0   r   r=   ndimr!  rh   rj   r+  r_   r-  cloner  r   )r;   r   r  r   r=   rw   r   r   r  channelsr  r  keypoint_detectionsr   r   r   absolute_keypointsr   r   r   r    r!   s                         r-   rK   z$LightGlueForKeypointMatching.forward^  s    XYY1B1N-TXT_T_TqTq$8$D $++JjJj 	 !\%6%6q%9Q%>noo1=1C1C.
Ax#++JNHfeT"44\B*=bq*A'	1k4%%j!R;>>|L	!))*aT=b=bcffgst||J2.&__.);Aq!QJ)G%)O1aA:&);Aq!QJ)G&)P1aA:&EIE[E[/!5 F\ F
B%
 /+'!	
 		
r,   rL   r   )NNN)r"   r#   r$   r%   r   r5   r   r]   rA  r&   rM   rN   r*   rF  rN  rQ  rU  rj  rv  r~  r  r   r   r'   
LongTensorr   rK   rO   rP   s   @r-   r&  r&    s    @(S (U ( gl? <<?49LL?X\_cXc?	u||U5<<#=>>	??#$)LL#?B#JO,,#didpdp#	#&(U\\ 5<< ^a fkfrfr #_\\#_ <<#_ ll	#_
 #_ ll#_ $ll#_ #_Jk@#*#* #* 	#*
 ||#* 
u||U\\)	*#*V %))-,0k
<<k
 \\k
 	k

 k
 llT!k
  $;k
 #Tkk
 
u||U\\5<<E	Fk
Z  +/)-,04
''4
   4'4
  $;	4

 #Tk4
 
0	04
  4
r,   r&  )r   )r   )5collections.abcr   dataclassesr   numpyr?  r&   r   torch.nn.utils.rnnr   activationsr	   integrationsr
   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   auto.modeling_autor   configuration_lightgluer   r   Moduler/   rY   re   rM   r   ro   r]   r   r   r   r   r   r   r   r   r*   r  r$  r&  __all__r+   r,   r-   <module>r     sk  ( % !    + ! / B F & D D - > 4  7k  7  7F "<8	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 LL4'% % % '(%2 )*D) D) +D)N299 "P>		 P>f-2\\JO,,
\\&BII &R	BII 	   $ELL $U $uU\\[`[g[gMgGh $@5<<  S U\\ , 
k
#; k

k
\ &'E
Fr,   