
    qiZe                        d Z ddlZddlmZ ddlZddlmc mZ ddlm	Z	mZ ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(  ejR                  e*      Z+ G d de(      Z,e ed       G d de                    Z- G d de$      Z. G d de!      Z/ G d de      Z0 G d d e&      Z1 G d! d"e       Z2 G d# d$e      Z3 G d% d&ejh                        Z5 G d' d(ejl                        Z7 G d) d*ejl                        Z8 G d+ d,ejl                        Z9e G d- d.e             Z: ed/       G d0 d1e#             Z;g d2Z<y)3zPyTorch EoMT model.    N)	dataclass)Tensornn   )initialization)ACT2FN)ModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )Dinov2EmbeddingsDinov2LayerDinov2LayerScaleDinov2PatchEmbeddings)#Mask2FormerForUniversalSegmentationMask2FormerLoss)SiglipAttention)	ViTConfigc                   z     e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedededededed	ef fd
Z xZS )
EomtConfiga  
    This is the configuration class to store the configuration of a [`EomtForUniversalSegmentation`]. It is used to instantiate an EoMT model
    according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the EoMT
    [tue-mps/coco_panoptic_eomt_large_640](https://huggingface.co/tue-mps/coco_panoptic_eomt_large_640)
    architecture.

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        hidden_size (`int`, *optional*, defaults to 1024):
            Dimensionality of the hidden representations.
        num_hidden_layers (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads in each attention layer.
        mlp_ratio (`int`, *optional*, defaults to 4):
            Ratio of the MLP hidden dimensionality to the hidden size.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
            The non-linear activation function (function or string) in the encoder.
        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
            The dropout probability for all fully connected layers in the embeddings and encoder.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        layer_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        image_size (`int`, *optional*, defaults to 640):
            The size (resolution) of each input image.
        patch_size (`int`, *optional*, defaults to 16):
            The size (resolution) of each patch.
        num_channels (`int`, *optional*, defaults to 3):
            The number of input channels.
        layerscale_value (`float`, *optional*, defaults to 1.0):
            Initial value for the LayerScale parameter.
        drop_path_rate (`float`, *optional*, defaults to 0.0):
            The stochastic depth rate (drop path) used during training.
        num_upscale_blocks (`int`, *optional*, defaults to 2):
            Number of upsampling blocks used in the decoder or segmentation head.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            Dropout probability applied after attention projection.
        use_swiglu_ffn (`bool`, *optional*, defaults to `False`):
            Whether to use the SwiGLU feedforward neural network.
        num_blocks (`int`, *optional*, defaults to 4):
            Number of feature blocks or stages in the architecture.
        no_object_weight (`float`, *optional*, defaults to 0.1):
            Loss weight for the 'no object' class in panoptic/instance segmentation.
        class_weight (`float`, *optional*, defaults to 2.0):
            Loss weight for classification targets.
        mask_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for mask prediction.
        dice_weight (`float`, *optional*, defaults to 5.0):
            Loss weight for the dice loss component.
        train_num_points (`int`, *optional*, defaults to 12544):
            Number of points to sample for mask loss computation during training.
        oversample_ratio (`float`, *optional*, defaults to 3.0):
            Oversampling ratio used in point sampling for mask training.
        importance_sample_ratio (`float`, *optional*, defaults to 0.75):
            Ratio of points to sample based on importance during training.
        num_queries (`int`, *optional*, defaults to 200):
            Number of object queries in the Transformer.
        num_register_tokens (`int`, *optional*, defaults to 4):
            Number of learnable register tokens added to the transformer input.

    Example:

    ```python
    >>> from transformers import EomtConfig, EomtForUniversalSegmentation

    >>> # Initialize configuration
    >>> config = EomtConfig()

    >>> # Initialize model
    >>> model = EomtForUniversalSegmentation(config)

    >>> # Access config
    >>> config = model.config
    ```eomtno_object_weightclass_weightmask_weightdice_weighttrain_num_pointsoversample_ratioimportance_sample_ratioc                 2   || _         || _        || _        || _        || _        || _        || _        || _        || _        || _	        || _
        || _        || _        || _        || _        || _        t!        | D  d||||||||	|
|d
| | `| `| `| `| `| `y )N)
hidden_sizenum_hidden_layersnum_attention_headshidden_dropout_prob
hidden_actinitializer_rangelayer_norm_eps
image_size
patch_sizenum_channels )	mlp_ratioattention_dropoutlayerscale_valuedrop_path_ratenum_upscale_blocksuse_swiglu_ffn
num_blocksr   r   r   r    r!   r"   r#   num_queriesnum_register_tokenssuper__init__intermediate_sizeqkv_bias
pooler_actpooler_output_sizeencoder_strideattention_probs_dropout_prob)selfr%   r&   r'   r0   r)   r(   r*   r+   r,   r-   r.   r2   r3   r4   r1   r5   r6   r   r   r   r    r!   r"   r#   r7   r8   kwargs	__class__s                               W/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/eomt/modular_eomt.pyr:   zEomtConfig.__init__   s    < #!2 0,"4,$ 0(&& 0 0'>$&#6  	
#/ 3 3!/)!!%	
 	
 "MO#-    )i            gelu        g{Gz?ư>i  rG   r   g      ?rJ   r   rJ   FrH   g?g       @      @rL   i 1  g      @g      ?   rH   )	__name__
__module____qualname____doc__
model_typefloatintr:   __classcell__rC   s   @rD   r   r   3   s    M^ J "%!   %"%)-7B.&  'B.( )B.* +B., -B.. /B.0  1B.2 "'3B. B.rE   r   a  
    Class for outputs of [`EomtForUniversalSegmentationOutput`].

    This output can be directly passed to [`~EomtImageProcessor.post_process_semantic_segmentation`] or
    [`~EomtImageProcessor.post_process_instance_segmentation`] or
    [`~EomtImageProcessor.post_process_panoptic_segmentation`] to compute final segmentation maps. Please, see
    [`~EomtImageProcessor] for details regarding usage.
    )custom_introc                   <   e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  dz  ed<   dZ
ej                  dz  ed<   dZeej                     dz  ed<   dZeej                     dz  ed<   dZeej                      dz  ed	<   y)
"EomtForUniversalSegmentationOutputa*  
    loss (`torch.Tensor`, *optional*):
        The computed loss, returned when labels are present.
    class_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, num_labels + 1)` representing the proposed classes for each
        query. Note the `+ 1` is needed because we incorporate the null class.
    masks_queries_logits (`torch.FloatTensor`):
        A tensor of shape `(batch_size, num_queries, height, width)` representing the proposed masks for each
        query.
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
        Last hidden states (final feature map) of the last layer.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states all layers of the model.
    attentions (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tuple(torch.FloatTensor)` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`. Self and Cross Attentions weights from transformer decoder.
    patch_offsets (`list[torch.Tensor]`, *optional*):
        list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
    Nlossclass_queries_logitsmasks_queries_logitslast_hidden_statehidden_states
attentionspatch_offsets)rN   rO   rP   rQ   rZ   torchFloatTensor__annotations__r[   r\   r]   r^   tupler_   r`   listr   r/   rE   rD   rY   rY      s    * &*D%

d
")59%++d2959%++d2926u((4/659M5**+d2926Je''(4/6/3M4%,3rE   rY   c                       e Zd Zy)EomtLossNrN   rO   rP   r/   rE   rD   rg   rg          rE   rg   c                       e Zd Zy)EomtPatchEmbeddingsNrh   r/   rE   rD   rk   rk      ri   rE   rk   c                   Z    e Zd ZdeddfdZd Zdej                  dej                  fdZy)EomtEmbeddingsconfigreturnNc                    t         j                  j                  |        || _        |j                  | _        t        j
                  t        j                  dd|j                              | _	        t        j
                  t        j                  d|j                  |j                              | _        t        |      | _        | j                  j                  }t        j                   |j"                        | _        d|j                  z   | _        t        j(                  ||j                        | _        | j-                  dt        j.                  |      j1                  d      d       y )N   position_idsrq   F)
persistent)r   Moduler:   rn   r-   	Parameterra   randnr%   	cls_tokenzerosr8   register_tokensrk   patch_embeddingsnum_patchesDropoutr(   dropoutnum_prefix_tokens	Embeddingposition_embeddingsregister_bufferarangeexpand)rA   rn   r}   s      rD   r:   zEomtEmbeddings.__init__   s    
		4  ++ekk!Q8J8J&KL!||EKK6;U;UW]WiWi,jk 3F ;++77zz&"<"<=!"V%?%?!?#%<<V=O=O#P ^U\\+-F-M-Mg-VchirE   c                     t        d      )NzNot needed for Eomt ModelAttributeErrorrA   s    rD   interpolate_pos_encodingz'EomtEmbeddings.interpolate_pos_encoding  s    899rE   pixel_valuesc                    |j                   \  }}}}| j                  j                  j                  j                  }| j                  |j                  |            }| j                  j                  |dd      }| j                  j                  |dd      }|| j                  | j                        z   }t        j                  |||gd      }| j                  |      }|S )N)dtypert   rq   dim)shaper|   
projectionweightr   tory   r   r{   r   rr   ra   catr   )rA   r   
batch_size_target_dtype
embeddings
cls_tokensr{   s           rD   forwardzEomtEmbeddings.forward  s    *00
Aq!,,77>>DD**<???+NO
^^**:r2>
..55j"bI$":":4;L;L"MM
YY
OZHaP
\\*-
rE   )	rN   rO   rP   r   r:   r   ra   r   r   r/   rE   rD   rm   rm      s8    jz jd j :ELL U\\ rE   rm   c                       e Zd Zy)EomtAttentionNrh   r/   rE   rD   r   r      ri   rE   r   c                       e Zd Zy)EomtLayerScaleNrh   r/   rE   rD   r   r   $  ri   rE   r   c                   f    e Zd Z	 ddej                  dej                  dz  dej                  fdZy)	EomtLayerNr^   attention_maskro   c                 *   | j                  |      }| j                  ||      \  }}| j                  |      }| j                  |      |z   }| j	                  |      }| j                  |      }| j                  |      }| j                  |      |z   }|S N)norm1	attentionlayer_scale1	drop_pathnorm2mlplayer_scale2)rA   r^   r   hidden_states_normself_attention_outputr   layer_outputs          rD   r   zEomtLayer.forward)  s    
 "ZZ6#'>>2Dn#U q $ 1 12G H '<=M zz-0xx-((6 ~~l3mCrE   r   )rN   rO   rP   ra   r   r   r/   rE   rD   r   r   (  s9     /3|| t+ 
	rE   r   c                   X     e Zd Zd fd	Zdej
                  dej
                  fdZ xZS )EomtLayerNorm2dc                 *    t         |   |||       y )N)epselementwise_affine)r9   r:   )rA   r.   r   affinerC   s       rD   r:   zEomtLayerNorm2d.__init__A  s    36JrE   hidden_statero   c                     |j                  dddd      }t        j                  || j                  | j                  | j
                  | j                        }|j                  dddd      }|S )Nr   r   r   rq   )permuteF
layer_normnormalized_shaper   biasr   )rA   r   s     rD   r   zEomtLayerNorm2d.forwardD  sb    #++Aq!Q7||L$2G2GVZV_V_aeaiaij#++Aq!Q7rE   )rK   T)rN   rO   rP   r:   ra   r   r   rU   rV   s   @rD   r   r   @  s$    KELL U\\ rE   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtScaleLayerrn   c                    t         |           |j                  }t        j                  ||dd      | _        t        |j                     | _        t        j                  ||dd|d      | _
        t        |      | _        y )Nr   )kernel_sizestrider   rq   F)r   paddinggroupsr   )r9   r:   r%   r   ConvTranspose2dconv1r   r)   
activationConv2dconv2r   layernorm2drA   rn   r%   rC   s      rD   r:   zEomtScaleLayer.__init__L  su    ((''[aXYZ
 !2!23YY

 +;7rE   r^   ro   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }|S r   )r   r   r   r   rA   r^   s     rD   r   zEomtScaleLayer.forward\  sB    

=16

=1((7rE   	rN   rO   rP   r   r:   ra   r   r   rU   rV   s   @rD   r   r   K  s*    8z 8 U\\ ell rE   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtScaleBlockrn   c                     t         |           |j                  | _        t	        j
                  t        | j                        D cg c]  }t        |       c}      | _        y c c}w r   )	r9   r:   r4   r6   r   
ModuleListranger   block)rA   rn   r   rC   s      rD   r:   zEomtScaleBlock.__init__e  sG     33]]E$//DZ#[qN6$:#[\
#[s   A&r^   ro   c                 8    | j                   D ]
  } ||      } |S r   )r   )rA   r^   r   s      rD   r   zEomtScaleBlock.forwardj  s%    ZZ 	1E!-0M	1rE   r   rV   s   @rD   r   r   d  s,    ]z ]
U\\ ell rE   r   c                   \     e Zd Zdef fdZdej                  dej                  fdZ xZS )EomtMaskHeadrn   c                    t         |           |j                  }t        j                  ||      | _        t        j                  ||      | _        t        j                  ||      | _        t        |j                     | _
        y r   )r9   r:   r%   r   Linearfc1fc2fc3r   r)   r   r   s      rD   r:   zEomtMaskHead.__init__q  sa    ((99[+699[+699[+6 !2!23rE   r^   ro   c                     | j                  | j                  |            }| j                  | j                  |            }| j                  |      }|S r   )r   r   r   r   r   s     rD   r   zEomtMaskHead.forwardz  sD    (?@(?@/rE   r   rV   s   @rD   r   r   p  s*    4z 4U\\ ell rE   r   c                       e Zd ZU dZeed<   dZdZdZdZ	dgZ
dZeed	Z ej                          d
ej$                  ddfd       Zy)EomtPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    rn   r   r   )imageFr   T)r^   r_   modulero   Nc                     | j                   j                  }t        |t        j                  t        j
                  t        j                  f      rt        j                  |j                  t        j                  d             |j                  xt        j                  j                  j                  |j                        \  }}|dkD  rdt        j                  |      z  nd}t        j                  |j                  | |       y y t        |t        j                         r?t        j"                  |j                         t        j$                  |j                         y t        |t        j&                        rtt        j(                  |j                  dd       |j*                  Et-        |j                  dd      s-t        j$                  |j                  |j*                            y y y t        |t.              rBt1        |d	      r5t        j2                  |j4                  | j                   j6                         y y t        |t8              rt        j:                  |j<                  d|       t        j$                  |j>                         t        j@                  |jB                  t        jD                  |jB                  jF                  d
         jI                  d             y t        |tJ              rRt        jL                  |jN                  dz         }|jP                  |d
<   t        j@                  |jR                  |       y t        |tT              r t        j"                  |jV                         y y )N   )ar   rq   rJ   )meanstd_is_hf_initializedFlambda1rt   rs   ),rn   r*   
isinstancer   r   r   r   initkaiming_uniform_r   mathsqrtr   ra   _calculate_fan_in_and_fan_outuniform_	LayerNormones_zeros_r   normal_padding_idxgetattrr   hasattr	constant_r   r2   rm   trunc_normal_ry   r{   copy_rr   r   r   r   rg   ones
num_labelseos_coefempty_weightEomtForUniversalSegmentationattn_mask_probs)rA   r   r   fan_inr   boundr   s          rD   _init_weightsz!EomtPreTrainedModel._init_weights  sJ   kk++fryy"))R5G5GHI!!&--499Q<@{{&!HHMMGGV	17!DIIf--fkkE659 ' -JJv}}%KK$-LLSa8!!-gfmmMach6iFMM&*<*<=> 7j-/vy)v~~t{{/K/KL */v//csCKK../JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh) ::f&7&7!&;<L%LJJv**L9 <=JJv--. >rE   )rN   rO   rP   rQ   r   rc   base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_sdpar   r   _can_record_outputsra   no_gradr   rv   r   r/   rE   rD   r   r     so    
 $O!&+#$N"#
 U]]_/BII /$ / /rE   r   zV
    The EoMT Model with head on top for instance/semantic/panoptic segmentation.
    c                       e Zd ZdefdZd Zd Zdej                  fdZ	e
d        Zeee	 	 	 dd	ed
ee   dz  dee   dz  dee   dz  dee   defd                     Zy)r   rn   c                    t        j                  | |       || _        |j                  | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        t        j                  t        |j                        D cg c]  }t!        |       c}      | _        t%        |      | _        t)        |      | _        t        j,                  |j                  |j.                  dz         | _        |j2                  |j4                  z  |j2                  |j4                  z  f| _        |j8                  |j:                  |j<                  d| _        tA        || j>                        | _!        | jE                  dtG        jH                  |jJ                               | jM                          y c c}w )N)r   rq   )loss_cross_entropy	loss_mask	loss_dice)rn   weight_dictr   )'r
   r:   rn   r&   rm   r   r   r   r%   r+   	layernormr   r7   queryr   r   r   layersr   upscale_blockr   	mask_headr   r   class_predictorr,   r-   	grid_sizer   r   r    r  rg   	criterionr   ra   r   r6   	post_init)rA   rn   r   s      rD   r:   z%EomtForUniversalSegmentation.__init__  sr     v.!'!9!9(0f&8&8f>S>ST\\&"4"4f6H6HI
mmfF^F^@_$`1Yv%6$`a+F3%f-!yy););V=N=NQR=RS ++v/@/@@&BSBSW]WhWhBhi"("5"5++++.
 "T=M=MN.

6;L;L0MN% %as   G0c                 .    | j                   j                  S r   )r   r|   r   s    rD   get_input_embeddingsz1EomtForUniversalSegmentation.get_input_embeddings  s    ///rE   c                     t        d      )NzNote needed for Eomt Model.r   r   s    rD   get_auxiliary_logitsz1EomtForUniversalSegmentation.get_auxiliary_logits  s    :;;rE   logitsc                    |d d d | j                   j                  d d f   }| j                  |      }|d d | j                   j                  | j                  j                  z   d d d f   }|j                  dd      } |j                  |j                  d   dg| j                   }| j                  |      }| j                  |      }t        j                  d||      }||fS )Nrq   r   r   rt   zbqc, bchw -> bqhw)rn   r7   r  r   r   	transposereshaper   r  r  r  ra   einsum)rA   r  query_tokensclass_logitsprefix_tokensmask_logitss         rD   predictz$EomtForUniversalSegmentation.predict  s    a!:4;;#:#:!:A=>++L9q$++"9"9DOO<]<]"]"_abbc%//15---m.A.A!.DbZ4>>Z~~l3**=9ll#6mTL((rE   c                     |dk  r9t        j                  | j                  d   ||      |kD  }d| d d d ||d f   |<   | S )Nrq   r   )device)ra   randr   )	attn_maskprobnum_query_tokensencoder_start_tokensr&  random_queriess         rD   _disable_attention_maskz4EomtForUniversalSegmentation._disable_attention_mask  sW    !8"ZZ	(:<LU[\_ccN VWIa***,@,AAB>RrE   Nr   mask_labelsclass_labelsr`   rB   ro   c                 (   d\  }}d}|t        d      | j                  |      }	t        | j                        D ]  \  }
}|
| j                  | j
                  j                  z
  k(  rp| j                  j                  dddddf   j                  |	j                  d   dd      j                  |	j                        }t        j                  ||	fd      }	|
| j                  | j
                  j                  z
  k\  r| j                  s7| j                   |
| j                  z
  | j
                  j                  z      dkD  r| j#                  |	      }| j%                  |      \  }}||fz  }||fz  }t        j&                  |	j                  d   |	j                  d   |	j                  d   |	j                  t        j(                        }t+        j,                  || j.                  d	
      }|j1                  |j3                  d      |j3                  d      d      }| j
                  j4                  }|| j                  j6                  z   }|dkD  |ddd||df<   | j9                  || j                   |
| j                  z
  | j
                  j                  z      |||j                        }|ddddf   j                  d| j
                  j:                  dd      }|j=                         j?                  | d      } ||	|      }	 | j#                  |	      }| j%                  |      \  }}||fz  }||fz  }d}|B|@d}tA        ||      D ]/  \  }}| jC                  ||||d      }|| jE                  |      z  }1 tG        |||||      S )ag  
        mask_labels (`list[torch.Tensor]`, *optional*):
            list of mask labels of shape `(num_labels, height, width)` to be fed to a model
        class_labels (`list[torch.LongTensor]`, *optional*):
            list of target class labels of shape `(num_labels, height, width)` to be fed to a model. They identify the
            labels of `mask_labels`, e.g. the label of `mask_labels[i][j]` if `class_labels[i][j]`.
        patch_offsets (`list[torch.Tensor]`, *optional*):
            list of tuples indicating the image index and start and end positions of patches for semantic segmentation.
        )r/   r/   Nz You have to specify pixel_valuesr   rt   rq   r   )r&  r   bilinear)sizemode)r)  r*  r+  r&  .g    erJ   )r\   r[   r.  r/  auxiliary_predictions)rZ   r\   r[   r]   r`   )$
ValueErrorr   	enumerater  r&   rn   r6   r  r   r   r   r   r&  ra   r   trainingr   r  r$  r   boolr   interpolater  viewr2  r7   r   r-  r'   rS   masked_fillzipget_loss_dictget_lossrY   )rA   r   r.  r/  r`   rB   masks_queries_logits_per_layerclass_queries_logits_per_layerr   r^   idxlayer_moduler  norm_hidden_statesr\   r[   interpolated_logitsr*  r+  sequence_outputrZ   	loss_dicts                         rD   r   z$EomtForUniversalSegmentation.forward  s   * JPF&(F?@@5!*4;;!7 .	HCd,,t{{/E/EEE

))$1*5<<]=P=PQR=SUWY[\__`m`t`tu %		5-*@a Hd,,t{{/E/EEE!5!5cD<R<R6RUYU`U`UkUk6k!lop!p%)^^M%B"=A\\J\=]:$&:.3G2II..3G2II.!&!''*!''*!''*(//**" '(mm4Ht~~dn&o#&9&>&>',,Q/1D1I1I!1Lb'# $(;;#:#: '7$//:[:['[$ ObdeNeq"3#3"35I5JJK "&!=!="--cD4J4J.JT[[McMc.cd%5)=)00 "> " "04!=!D!DRIhIhjlnp!q!/!5!5!7!C!C^OUY!Z(GM].	H` ..759\\/5R22&+?*AA&&+?*AA&"|'?D>A.0N? 
1:$&: !..)=)= +!-*. / 	 i00
1 2!5!5-'
 	
rE   )NNN)rN   rO   rP   r   r:   r  r  ra   r   r$  staticmethodr-  r   r   r   re   r   r   rY   r   r/   rE   rD   r   r     s    z 80<)ell )      ,0,0-1e
e
 &\D(e
 6lT)	e

 F|d*e
 +,e
 
,e
    e
rE   r   )r   r   r   )=rQ   r   dataclassesr   ra   torch.nn.functionalr   
functionalr   r    r   r   activationsr   
file_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   dinov2.modeling_dinov2r   r   r   r    mask2former.modeling_mask2formerr   r   siglip.modeling_siglipr   vit.configuration_vitr   
get_loggerrN   loggerr   rY   rg   rk   rm   r   r   r   r   r   rv   r   r   r   r   r   __all__r/   rE   rD   <module>rZ     s     !     & ! . & 
 8 5  d 4 - 
		H	%T. T.n 	4 4	 4>	 		/ 	!% !H	O 		% 	 0bll RYY 2	RYY 	299 " /// // //d 
f
#F f

f
R PrE   