
    qif                        d dl mZ d dlmZ d dlZd dlmZ d dlmZmZm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZmZ ddlmZmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z'm(Z(m)Z) ddl*m+Z+ ddl,m-Z-  ed       G d dej\                               Z/d Z0 ed      d;d       Z1dejd                  de3dejd                  fdZ4	 d<dej\                  d ejd                  d!ejd                  d"ejd                  d#ejd                  dz  d$e5d%e5d&e"e&   fd'Z6 ee1       G d( d)ej\                               Z7 G d* d+ej\                        Z8 G d, d-e      Z9e$ G d. d/e              Z: G d0 d1ej\                        Z;e$ G d2 d3e:             Z<e$ G d4 d5e:             Z=e$ G d6 d7e:             Z>e$ G d8 d9e:             Z?g d:Z@y)=    )Callable)OptionalN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)Cache)use_kernel_forward_from_hubuse_kernel_func_from_hubuse_kernelized_func)create_bidirectional_mask)GradientCheckpointingLayer)BaseModelOutputMaskedLMOutputSequenceClassifierOutputTokenClassifierOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)auto_docstring)TransformersKwargscan_return_tuplemaybe_autocastmerge_with_config_defaults)capture_outputs   )EuroBertConfigRMSNormc                   `     e Zd Zdd fdZdej
                  dej
                  fdZd Z xZS )EuroBertRMSNormreturnc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z>
        EuroBertRMSNorm is equivalent to T5LayerNorm
        N)super__init__r   	Parametertorchonesweightvariance_epsilon)selfhidden_sizeeps	__class__s      `/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/eurobert/modeling_eurobert.pyr(   zEuroBertRMSNorm.__init__.   s1     	ll5::k#:; #    hidden_statesc                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )N   T)keepdim)	dtypetor*   float32powmeanrsqrtr-   r,   )r.   r4   input_dtypevariances       r2   forwardzEuroBertRMSNorm.forward6   sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r3   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tupler,   shaper-   r.   s    r2   
extra_reprzEuroBertRMSNorm.extra_repr=   s*    ))*+6$2G2G1HIIr3   )gh㈵>)r%   N)	__name__
__module____qualname__r(   r*   TensorrA   rF   __classcell__r1   s   @r2   r$   r$   ,   s)    $;U\\ ;ell ;Jr3   r$   c                     | dd| j                   d   dz  f   }| d| j                   d   dz  df   }t        j                  | |fd      S )z*Rotates half the hidden dims of the input..Nr7   r6   dim)rD   r*   cat)xx1x2s      r2   rotate_halfrT   A   sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r3   rotary_pos_embc                     |j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )	unsqueezerT   )qkcossinunsqueeze_dimq_embedk_embeds          r2   apply_rotary_pos_embr_   H   sY    & --
&C
--
&C3w;q>C/0G3w;q>C/0GGr3   r4   n_repr%   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r    N)rD   expandreshape)r4   r`   batchnum_key_value_headsslenhead_dims         r2   	repeat_kvrh   b   so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr3   modulequerykeyvalueattention_maskscalingdropoutkwargsc                    t        || j                        }t        || j                        }	t        j                  ||j	                  dd            |z  }
||
|z   }
t
        j                  j                  |
dt        j                        j                  |j                        }
t
        j                  j                  |
|| j                        }
t        j                  |
|	      }|j	                  dd      j                         }||
fS )Nr6   r	   r7   )rO   r9   )ptrainingr    )rh   num_key_value_groupsr*   matmul	transposer   
functionalsoftmaxr;   r:   r9   ro   rs   
contiguous)ri   rj   rk   rl   rm   rn   ro   rp   
key_statesvalue_statesattn_weightsattn_outputs               r2   eager_attention_forwardr~   n   s     3 ; ;<JUF$?$?@L<<z';';Aq'ABWLL!#n4==((2U]](SVVW\WbWbcL==((6??([L,,|\:K''1-88:K$$r3   c                       e Zd ZdZdedef fdZ	 	 	 	 ddej                  de	ej                  ej                  f   dz  dej                  dz  d	e
dz  d
ej                  dz  dee   de	ej                  ej                  f   fdZ xZS )EuroBertAttentionz=Multi-headed attention from 'Attention Is All You Need' paperconfig	layer_idxc                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nrg   g      Fbias)r'   r(   r   r   getattrr/   num_attention_headsrg   re   rt   rn   attention_dropout	is_causalr   Linearattention_biasq_projk_projv_projo_projr.   r   r   r1   s      r2   r(   zEuroBertAttention.__init__   sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r3   Nr4   position_embeddingsrm   past_key_valuescache_positionrp   r%   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr7   r    r6   )r[   rZ   r           )ro   rn   )rD   rg   r   viewrv   r   r   r_   updater   r   get_interfacer   _attn_implementationr~   rs   r   rn   rc   ry   r   )r.   r4   r   rm   r   r   rp   input_shapehidden_shapequery_statesrz   r{   rZ   r[   cache_kwargsattention_interfacer}   r|   s                     r2   rA   zEuroBertAttention.forward   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r3   NNNN)rG   rH   rI   __doc__r!   intr(   r*   rJ   rC   r   
LongTensorr   r   rA   rK   rL   s   @r2   r   r      s    G
~ 
# 
4 IM.2(,26))||)) #5<<#=>E)) t+	))
 )) ((4/)) +,)) 
u||U\\)	*))r3   r   c                   $     e Zd Z fdZd Z xZS )EuroBertMLPc                    t         |           || _        |j                  | _        |j                  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _	        t        j                  | j                  | j                  |j                        | _
        t        |j                     | _        y )Nr   )r'   r(   r   r/   intermediate_sizer   r   mlp_bias	gate_projup_proj	down_projr
   
hidden_actact_fnr.   r   r1   s     r2   r(   zEuroBertMLP.__init__   s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r3   c                     | j                  | j                  | j                  |            | j                  |      z        }|S N)r   r   r   r   )r.   rQ   r   s      r2   rA   zEuroBertMLP.forward   s6    NN4;;t~~a/@#ADLLQRO#ST	r3   )rG   rH   rI   r(   rA   rK   rL   s   @r2   r   r      s    0r3   r   c                   "    e Zd Zdedef fdZ	 	 	 	 	 	 ddej                  dej                  dz  dej                  dz  de	dz  d	e
dz  d
ej                  dz  deej                  ej                  f   dz  dee   dej                  fdZ xZS )EuroBertDecoderLayerr   r   c                     t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        y )N)r   r   r0   )r'   r(   r/   r   	self_attnr   mlpr$   rms_norm_epsinput_layernormpost_attention_layernormr   s      r2   r(   zEuroBertDecoderLayer.__init__   sm    !--*&INv&.v/A/AvGZGZ[(78J8JPVPcPc(d%r3   Nr4   rm   position_idsr   	use_cacher   r   rp   r%   c                     |}	| j                  |      } | j                  d|||||||d|\  }}
|	|z   }|}	| j                  |      }| j                  |      }|	|z   }|S )N)r4   rm   r   r   r   r   r    )r   r   r   r   )r.   r4   rm   r   r   r   r   r   rp   residual_s              r2   rA   zEuroBertDecoderLayer.forward   s     !,,];)4>> 	
')%+) 3	
 	
q !=0 !55mD/ =0r3   )NNNFNN)rG   rH   rI   r!   r   r(   r*   rJ   r   r   boolrC   r   r   rA   rK   rL   s   @r2   r   r      s    e~ e# e /304(,!&26HL|| t+ &&-	
  $; ((4/ #5<<#=>E +, 
r3   r   c                   J    e Zd ZU eed<   dZdZdgZdgZdZ	dZ
dZdZdZeedZy)EuroBertPreTrainedModelr   modelTr   r   )r4   
attentionsN)rG   rH   rI   r!   __annotations__base_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendr   r   _can_record_outputsr   r3   r2   r   r     sQ    &*#/0#4"5N!"&-'r3   r   c                        e Zd ZU ej                  ed<   ddef fdZe	 	 	 ddedz  de	d   de
dz  ded	ef   fd
       Z ej                         ed               Z xZS )EuroBertRotaryEmbeddinginv_freqNr   c                    t         |           |j                  | _        |j                  | _        || _        | j
                  j                  d   | _        | j                  }| j                  dk7  rt        | j                     } || j
                  |      \  }| _
        | j                  d|d       | j                  d|j                         d       y )N	rope_typedefaultr   F)
persistentoriginal_inv_freq)r'   r(   max_position_embeddingsmax_seq_len_cachedoriginal_max_seq_lenr   rope_parametersr   compute_default_rope_parametersr   attention_scalingregister_bufferclone)r.   r   devicerope_init_fnr   r1   s        r2   r(   z EuroBertRotaryEmbedding.__init__!  s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L($(ZeD0(..2BuUr3   r   ztorch.deviceseq_lenr%   ztorch.Tensorc                    | j                   d   }t        | dd      xs | j                  | j                  z  }d}d|t	        j
                  d|dt        j                        j                  |t        j                        |z  z  z  }||fS )	a  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetarg   Ng      ?r   r6   r9   )r   r9   )	r   r   r/   r   r*   arangeint64r:   float)r   r   r   baserO   attention_factorr   s          r2   r   z7EuroBertRotaryEmbedding.compute_default_rope_parameters1  s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 )))r3   c                 N   | j                   d d d d f   j                         j                  |j                  d   dd      j	                  |j
                        }|d d d d d f   j                         }t        |j
                  j                  t              r/|j
                  j                  dk7  r|j
                  j                  nd}t        |d      5  |j                         |j                         z  j                  dd      }t        j                  ||fd	      }|j                         | j                  z  }|j                         | j                  z  }	d d d        j	                  |j                   
      	j	                  |j                   
      fS # 1 sw Y   AxY w)Nr   r7   r    mpscpuF)device_typeenabledr6   rN   r   )r   r   rb   rD   r:   r   
isinstancetypestrr   rv   r*   rP   rZ   r   r[   r9   )
r.   rQ   r   inv_freq_expandedposition_ids_expandedr   freqsembrZ   r[   s
             r2   rA   zEuroBertRotaryEmbedding.forwardO  sR    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E!((--[`J`ahhmmfkUC 	5&,,.1F1L1L1NNYYZ[]^_E))UEN3C'')d444C'')d444C		5 vvAGGv$cff177f&;;;	5 	5s   BFF$r   )NNN)rG   rH   rI   r*   rJ   r   r!   r(   staticmethodr   r   rC   r   r   no_gradr   rA   rK   rL   s   @r2   r   r     s    llV~ V  (,+/"*%*(* t* 
~u$	%	* *: U]]_<  <r3   r   c                        e Zd Zdef fdZeee	 	 	 	 ddej                  dej                  dz  dej                  dz  dej                  dz  dee   d	eez  fd
                     Z xZS )EuroBertModelr   c           	         t         |   |       |j                  | _        |j                  | _        t        j                  |j                  |j                  | j                        | _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t        |j                  |j                        | _        t#        |      | _        d| _        | j)                          y c c}w )Nr   )r   F)r'   r(   pad_token_idpadding_idx
vocab_sizer   	Embeddingr/   embed_tokens
ModuleListrangenum_hidden_layersr   layersr$   r   normr   
rotary_embgradient_checkpointing	post_initr   s      r2   r(   zEuroBertModel.__init__a  s     !.. ++LL):):F<N<NPTP`P`ammFKFLdLdFef!&)4f
 $F$6$6F<O<OP	1@&+# 	 gs   DN	input_idsrm   r   inputs_embedsrp   r%   c                    |d u |d uz  rt        d      || j                  |      }|=t        j                  |j                  d   |j
                        j                  d      }t        | j                  ||      }|}| j                  ||      }| j                  d | j                  j                   D ]  }	 |	|f|||d|} | j                  |      }t        |      S )	Nz:You must specify exactly one of input_ids or inputs_embedsr    )r   r   )r   r	  rm   )r   )rm   r   r   )last_hidden_state)
ValueErrorr   r*   r   rD   r   rW   r   r   r  r  r  r  r   )
r.   r  rm   r   r	  rp   bidirectional_maskr4   r   encoder_layers
             r2   rA   zEuroBertModel.forwardq  s    -t";<YZZ *.*;*;I*FM <<(;(;A(>}G[G[\ffghiL6;;')
 &"oom,oW![[)H4;;+H+HI 	M)1$7)	
 M	 		-0+
 	
r3   r   )rG   rH   rI   r!   r(   r   r   r   r*   r   rJ   FloatTensorr   r   rC   r   rA   rK   rL   s   @r2   r   r   _  s    ~     '+.20426&
##&
 t+&
 &&-	&

 ((4/&
 +,&
 
	 &
    &
r3   r   c                   0    e Zd ZddiZddiZddgdgfiZdef fdZee		 	 	 	 	 dd
e
j                  d	z  de
j                  d	z  de
j                  d	z  de
j                  d	z  de
j                  d	z  dee   dee
j                     ez  fd              Z xZS )EuroBertForMaskedLMzlm_head.weightzmodel.embed_tokens.weightlm_headcolwise_gather_outputr4   logitsr   c                     t         |   |       t        |      | _        t	        j
                  |j                  |j                  |j                        | _	        | j                          y r   )r'   r(   r   r   r   r   r/   r   r   r  r  r   s     r2   r(   zEuroBertForMaskedLM.__init__  sL     "6*
yy!3!3V5F5FX 	r3   Nr  rm   r   r	  labelsrp   r%   c                     | j                   d||||d|}| j                  |j                        }d}	|* | j                  d||| j                  j
                  d|}	t        |	||j                  |j                        S )a)  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, EuroBertForMaskedLM

        >>> model = EuroBertForMaskedLM.from_pretrained("EuroBERT/EuroBERT-210m")
        >>> tokenizer = AutoTokenizer.from_pretrained("EuroBERT/EuroBERT-210m")

        >>> text = "The capital of France is <|mask|>."
        >>> inputs = tokenizer(text, return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> # To get predictions for the mask:
        >>> masked_index = inputs["input_ids"][0].tolist().index(tokenizer.mask_token_id)
        >>> predicted_token_id = outputs.logits[0, masked_index].argmax(axis=-1)
        >>> predicted_token = tokenizer.decode(predicted_token_id)
        >>> print("Predicted token:", predicted_token)
        Predicted token:  Paris
        ```)r  rm   r   r	  N)r  r  r   lossr  r4   r   r   )	r   r  r  loss_functionr   r   r   r4   r   )
r.   r  rm   r   r	  r  rp   outputsr  r  s
             r2   rA   zEuroBertForMaskedLM.forward  s    > $.4:: $
)%'	$

 $
 g778%4%%pVFt{{OeOepiopD!//))	
 	
r3   NNNNN)rG   rH   rI   _tied_weights_keys_tp_plan_pp_planr!   r(   r   r   r*   r   rJ   r  r   r   rC   r   rA   rK   rL   s   @r2   r  r    s    *,GH23H_-z:;H~   .2.20426*./
##d*/
 t+/
 &&-	/

 ((4//
   4'/
 +,/
 
u||	~	-/
  /
r3   r  c                       e Zd Zdef fdZee	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ee   d
eej                     ez  fd              Z xZS )!EuroBertForSequenceClassificationr   c                    t         |   |       |j                  | _        |j                  | _        t	        |      | _        t        j                  |j                  |j                        | _	        t        j                         | _        t        j                  |j                  | j                        | _        | j                          y r   )r'   r(   
num_labelsclassifier_poolingr   r   r   r   r/   denseGELU
activation
classifierr  r   s     r2   r(   z*EuroBertForSequenceClassification.__init__  s      ++"(";";"6*
YYv1163E3EF
'')))F$6$6Hr3   Nr  rm   r   r	  r  rp   r%   c                 b    | j                   |f|||d|}|d   }| j                  dv r| j                  dk(  r
|d d df   }	n^| j                  dk(  rO||j                  d      }	n:||j                  d      z  j	                  d      }	|	|j	                  dd	
      z  }	| j                  	      }	| j                  |	      }	| j                  |	      }
n| j                  dk(  r| j                  |      }| j                  |      }| j                  |      }
||
j                  d      }
n:|
|j                  d      z  j	                  d      }
|
|j	                  dd	
      z  }
d }||j                  
j                        }| j                  j                  | j                  dk(  rd| j                  _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                   k(  rd| j                  _        nd| j                  _        | j                  j                  dk(  rIt#               }| j                  dk(  r& ||
j%                         |j%                               }n ||
|      }n| j                  j                  dk(  r=t'               } ||
j)                  d| j                        |j)                  d            }n,| j                  j                  dk(  rt+               } ||
|      }t-        |
|j.                  |j0                        S )Nrm   r   r	  r   )bosr=   r+  r=   r    rN   r7   T)rO   r8   late
regressionsingle_label_classificationmulti_label_classificationr  )r   r$  r=   rW   sumr%  r'  r(  r:   r   r   problem_typer#  r9   r*   longr   r   squeezer   r   r   r   r4   r   )r.   r  rm   r   r	  r  rp   encoder_outputr  pooled_outputr  rQ   r  loss_fcts                 r2   rA   z)EuroBertForSequenceClassification.forward  s    $
)%'	

 
 +1-""o5&&%/ 1!Q$ 7((F2!)$5$:$:q$:$AM%69Q9QRT9U%U$Z$Z_`$Z$aM!^%7%7At%7%LLM JJ}5M OOM:M__]3F$$.

,-A"A__Q'F%+ >#;#;B#??DDDK.,,D,AAYYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'(66%00	
 	
r3   r  )rG   rH   rI   r!   r(   r   r   r*   r   rJ   r  r   r   rC   r   rA   rK   rL   s   @r2   r!  r!    s    	~ 	  .2.20426*.H
##d*H
 t+H
 &&-	H

 ((4/H
   4'H
 +,H
 
u||	7	7H
  H
r3   r!  c                       e Zd Zdef fdZd Zd Zee	 	 	 	 	 dde	j                  dz  de	j                  dz  de	j                  dz  d	e	j                  dz  d
e	j                  dz  dee   deez  fd              Z xZS )EuroBertForTokenClassificationr   c                     t         |   |       |j                  | _        t        |      | _        t        j                  |j                  |j                        | _        | j                          y r   )
r'   r(   r#  r   r   r   r   r/   r(  r  r   s     r2   r(   z'EuroBertForTokenClassification.__init__;  sQ      ++"6*
))F$6$68I8IJr3   c                 .    | j                   j                  S r   r   r   rE   s    r2   get_input_embeddingsz3EuroBertForTokenClassification.get_input_embeddingsC  s    zz&&&r3   c                 &    || j                   _        y r   r;  )r.   rl   s     r2   set_input_embeddingsz3EuroBertForTokenClassification.set_input_embeddingsF  s    "'

r3   Nr  rm   r   r	  r  rp   r%   c                 "    | j                   |f|||d|}|d   }| j                  |      }	d}
|<t               } ||	j                  d| j                        |j                  d            }
t        |
|	|j                  |j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        r*  r   Nr7   r  )r   r(  r   r   r#  r   r4   r   )r.   r  rm   r   r	  r  rp   r  sequence_outputr  r  r6  s               r2   rA   z&EuroBertForTokenClassification.forwardI  s    " $**
)%'	

 
 "!*1')HFKKDOO<fkk"oND$!//))	
 	
r3   r  )rG   rH   rI   r!   r(   r<  r>  r   r   r*   r   rJ   r  r   r   rC   r   rA   rK   rL   s   @r2   r8  r8  9  s    ~ '(  .2.20426*.#
##d*#
 t+#
 &&-	#

 ((4/#
   4'#
 +,#
 
&	&#
  #
r3   r8  )r   r   r  r!  r8  )r    )r   )Acollections.abcr   typingr   r*   r   torch.nnr   r   r   activationsr
   cache_utilsr   integrationsr   r   r   masking_utilsr   modeling_layersr   modeling_outputsr   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   utils.genericr   r   r   r   utils.output_capturingr   configuration_eurobertr!   Moduler$   rT   r_   rJ   r   rh   r   r~   r   r   r   r   r   r   r  r!  r8  __all__r   r3   r2   <module>rS     s  , %    A A !   f f 6 9 p p K F & # m m 5 2 Y'Jbii J (J(( *+ ,2	UU\\ 	U# 	U%,, 	U& %II%<<% 
% <<	%
 LL4'% % % '(%2 )*C)		 C) +C)L"))  *5 *Z o  $><bii ><B :
+ :
 :
z >
1 >
 >
B V
(? V
 V
r 4
%< 4
 4
nr3   