
    qi                     Z   d Z ddlZddlZddlmZ ddlmZmZmZ ddlm	Z
 ddlmZmZ ddlmZmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ  ej:                  e      Ze G d de             Z  G d dejB                        Z" G d dejB                        Z# G d dejB                        Z$ G d dejB                        Z% G d dejB                        Z& G d dejB                        Z' G d dejB                        Z( G d dejB                        Z)e G d d e              Z* G d! d"e       Z+ G d# d$ejB                        Z, ed%&       G d' d(e              Z-e G d) d*e              Z.e G d+ d,e              Z/ G d- d.ejB                        Z0e G d/ d0e              Z1d1 Z2g d2Z3y)3zPyTorch MPNet model.    N)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )initialization)ACT2FNgelu)BaseModelOutputBaseModelOutputWithPoolingMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)PreTrainedModel)auto_docstringlogging   )MPNetConfigc                   V     e Zd ZU eed<   dZ ej                          fd       Z xZ	S )MPNetPreTrainedModelconfigmpnetc                 X   t         |   |       t        |t              r t	        j
                  |j                         yt        |t              rZt	        j                  |j                  t        j                  |j                  j                  d         j                  d             yy)zInitialize the weightsr   r   N)super_init_weights
isinstanceMPNetLMHeadinitzeros_biasMPNetEmbeddingscopy_position_idstorcharangeshapeexpand)selfmodule	__class__s     Z/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/mpnet/modeling_mpnet.pyr   z"MPNetPreTrainedModel._init_weights/   sv     	f%fk*KK$0JJv**ELL9L9L9R9RSU9V,W,^,^_f,gh 1    )
__name__
__module____qualname__r   __annotations__base_model_prefixr(   no_gradr   __classcell__r.   s   @r/   r   r   *   s+    U]]_i ir0   r   c                   ,     e Zd Z fdZddZd Z xZS )r%   c                 D   t         |           d| _        t        j                  |j
                  |j                  | j                        | _        t        j                  |j                  |j                  | j                        | _	        t        j                  |j                  |j                        | _
        t        j                  |j                        | _        | j                  dt!        j"                  |j                        j%                  d      d       y )Nr   )padding_idxepsr'   r   F)
persistent)r   __init__r;   r   	Embedding
vocab_sizehidden_sizeword_embeddingsmax_position_embeddingsposition_embeddings	LayerNormlayer_norm_epsDropouthidden_dropout_probdropoutregister_bufferr(   r)   r+   r,   r   r.   s     r/   r?   zMPNetEmbeddings.__init__:   s    !||F,=,=v?Q?Q_c_o_op#%<<**F,>,>DL\L\$
  f&8&8f>S>STzz&"<"<=ELL)G)GHOOPWXej 	 	
r0   c                 x   |*|t        || j                        }n| j                  |      }||j                         }n|j                         d d }|d   }|| j                  d d d |f   }|| j                  |      }| j                  |      }||z   }| j                  |      }| j                  |      }|S )Nr   r   )	"create_position_ids_from_input_idsr;   &create_position_ids_from_inputs_embedssizer'   rC   rE   rF   rJ   )	r,   	input_idsr'   inputs_embedskwargsinput_shape
seq_lengthrE   
embeddingss	            r/   forwardzMPNetEmbeddings.forwardH   s    $A)TM]M]^#JJ=Y #..*K',,.s3K ^
,,Q^<L  00;M"66|D"%88
^^J/
\\*-
r0   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr   r   )dtypedevicer   )rP   r(   r)   r;   longrZ   	unsqueezer+   )r,   rR   rT   sequence_lengthr'   s        r/   rO   z6MPNetEmbeddings.create_position_ids_from_inputs_embedsb   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r0   )NNN)r1   r2   r3   r?   rW   rO   r7   r8   s   @r/   r%   r%   9   s    
4=r0   r%   c                   ,     e Zd Z fdZ	 	 	 ddZ xZS )MPNetSelfAttentionc                 *   t         |           |j                  |j                  z  dk7  r2t	        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                         | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ())r   r?   rB   num_attention_headshasattr
ValueErrorintattention_head_sizeall_head_sizer   LinearqkvorH   attention_probs_dropout_probrJ   rL   s     r/   r?   zMPNetSelfAttention.__init__u   s:    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PP6--t/A/AB6--t/A/AB6--t/A/AB6--v/A/ABzz&"E"EFr0   c                    |j                   \  }}}| j                  |      j                  |d| j                  | j                        j                  dd      }	| j                  |      j                  |d| j                  | j                        j                  dd      }
| j                  |      j                  |d| j                  | j                        j                  dd      }t        j                  |	|
j                  dd            }|t        j                  | j                        z  }|||z  }|||z   }t        j                  j                  |d      }| j                  |      }t        j                  ||      }|j!                  dddd      j#                         }|j%                         d d | j&                  fz   } |j                  | }| j)                  |      }|r||f}|S |f}|S )Nr   r      dimr   r   )r*   rj   viewrc   rg   	transposerk   rl   r(   matmulmathsqrtr   
functionalsoftmaxrJ   permute
contiguousrP   rh   rm   )r,   hidden_statesattention_maskposition_biasoutput_attentionsrS   
batch_sizerU   _rj   rk   rl   attention_scoresattention_probscnew_c_shaperm   outputss                     r/   rW   zMPNetSelfAttention.forward   s    %2$7$7!
JFF=!T*b$":":D<T<TUYq!_ 	
 FF=!T*b$":":D<T<TUYq!_ 	
 FF=!T*b$":":D<T<TUYq!_ 	
 !<<1;;r2+>?+dii8P8P.QQ $-%/.@ --//0@b/I,,7LL!,IIaAq!,,.ffhsmt'9'9&;;AFFK FF1I*;1o& CDr0   NNFr1   r2   r3   r?   rW   r7   r8   s   @r/   r_   r_   t   s    G, 2r0   r_   c                   ,     e Zd Z fdZ	 	 	 ddZ xZS )MPNetAttentionc                     t         |           t        |      | _        t	        j
                  |j                  |j                        | _        t	        j                  |j                        | _
        y Nr<   )r   r?   r_   attnr   rF   rB   rG   rH   rI   rJ   rL   s     r/   r?   zMPNetAttention.__init__   sN    &v.	f&8&8f>S>STzz&"<"<=r0   c                     | j                  ||||      }| j                  | j                  |d         |z         }|f|dd  z   }|S )N)r   r   r   )r   rF   rJ   )	r,   r}   r~   r   r   rS   self_outputsattention_outputr   s	            r/   rW   zMPNetAttention.forward   s_     yy/	 ! 
  >>$,,|A*G-*WX#%QR(88r0   r   r   r8   s   @r/   r   r      s    > r0   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MPNetIntermediatec                    t         |           t        j                  |j                  |j
                        | _        t        |j                  t              rt        |j                     | _        y |j                  | _        y N)r   r?   r   ri   rB   intermediate_sizedenser    
hidden_actstrr	   intermediate_act_fnrL   s     r/   r?   zMPNetIntermediate.__init__   s]    YYv1163K3KL
f''-'-f.?.?'@D$'-'8'8D$r0   r}   returnc                 J    | j                  |      }| j                  |      }|S r   )r   r   )r,   r}   s     r/   rW   zMPNetIntermediate.forward   s&    

=100?r0   r1   r2   r3   r?   r(   TensorrW   r7   r8   s   @r/   r   r      s#    9U\\ ell r0   r   c                   n     e Zd Z fdZdej
                  dej
                  dej
                  fdZ xZS )MPNetOutputc                 (   t         |           t        j                  |j                  |j
                        | _        t        j                  |j
                  |j                        | _        t        j                  |j                        | _        y r   )r   r?   r   ri   r   rB   r   rF   rG   rH   rI   rJ   rL   s     r/   r?   zMPNetOutput.__init__   s`    YYv779K9KL
f&8&8f>S>STzz&"<"<=r0   r}   input_tensorr   c                 r    | j                  |      }| j                  |      }| j                  ||z         }|S r   )r   rJ   rF   )r,   r}   r   s      r/   rW   zMPNetOutput.forward   s7    

=1]3}|'CDr0   r   r8   s   @r/   r   r      s1    >U\\  RWR^R^ r0   r   c                   ,     e Zd Z fdZ	 	 	 ddZ xZS )
MPNetLayerc                     t         |           t        |      | _        t	        |      | _        t        |      | _        y r   )r   r?   r   	attentionr   intermediater   outputrL   s     r/   r?   zMPNetLayer.__init__   s3    '/-f5!&)r0   c                     | j                  ||||      }|d   }|dd  }| j                  |      }	| j                  |	|      }
|
f|z   }|S )N)r   r   r   r   )r   r   r   )r,   r}   r~   r   r   rS   self_attention_outputsr   r   intermediate_outputlayer_outputs              r/   rW   zMPNetLayer.forward   sr     "&'/	 "0 "
 2!4(,"//0@A{{#68HI/G+r0   r   r   r8   s   @r/   r   r      s    * r0   r   c                        e Zd Z fdZ	 	 	 	 ddej
                  dej
                  dz  dededef
dZdd	Ze	dd
       Z
 xZS )MPNetEncoderc                 B   t         |           || _        |j                  | _        t        j                  t        |j                        D cg c]  }t        |       c}      | _
        t        j                  |j                  | j                        | _        y c c}w r   )r   r?   r   rc   n_headsr   
ModuleListrangenum_hidden_layersr   layerr@   relative_attention_num_bucketsrelative_attention_bias)r,   r   r   r.   s      r/   r?   zMPNetEncoder.__init__  ss    11]]fF^F^@_#`1Jv$6#`a
')||F4Y4Y[_[g[g'h$ $as   BNr}   r~   r   output_hidden_statesreturn_dictc                    | j                  |      }|rdnd }|rdnd }	t        | j                        D ]+  \  }
}|r||fz   } ||||fd|i|}|d   }|s#|	|d   fz   }	- |r||fz   }|st        d |||	fD              S t	        |||	      S )N r   r   r   c              3   &   K   | ]	  }||  y wr   r   ).0rl   s     r/   	<genexpr>z'MPNetEncoder.forward.<locals>.<genexpr>=  s     hqZ[Zghs   )last_hidden_stater}   
attentions)compute_position_bias	enumerater   tupler   )r,   r}   r~   r   r   r   rS   r   all_hidden_statesall_attentionsilayer_modulelayer_outputss                r/   rW   zMPNetEncoder.forward  s     22=A"6BD0d(4 	FOA|#$58H$H!( #4	
 M *!,M !/=3C2E!E	F"   1]4D Dh]4E~$Vhhh++%
 	
r0   c                 d   |j                  d      |j                  d      |j                  d      }}}||d d d d d f   }|d d d d d f   }nXt        j                  |t        j                        d d d f   }t        j                  |t        j                        d d d f   }||z
  }	| j	                  |	|      }
|
j                  |j                        }
| j                  |
      }|j                  g d      j                  d      }|j                  |d||f      j                         }|S )Nr   r   )rY   )num_buckets)rp   r   r   r   )rP   r(   r)   r[   relative_position_buckettorZ   r   r{   r\   r+   r|   )r,   xr'   r   bszqlenklencontext_positionmemory_positionrelative_position	rp_bucketvaluess               r/   r   z"MPNetEncoder.compute_position_biasD  s   &&)QVVAYq	4T#+Aq$J7*1dA:6O$||D

CAtGL#ll4uzzB47KO+.>>112CQ\1]	LL*	--i8	*44Q7Rt45@@Br0   c                     d}|  }|dz  }||dk  j                  t        j                        |z  z  }t        j                  |      }|dz  }||k  }|t        j                  |j                         |z        t        j                  ||z        z  ||z
  z  j                  t        j                        z   }t        j                  |t        j                  ||dz
              }|t        j                  |||      z  }|S )Nr   rp   r   )
r   r(   r[   abslogfloatrw   min	full_likewhere)r   r   max_distanceretn	max_exactis_smallval_if_larges           r/   r   z%MPNetEncoder.relative_position_bucketV  s    Azz%**%33IIaL1$	y= IIaggi)+,txxy8P/QQU`clUlm
"UZZ. yyu|[[\_/]^u{{8Q55
r0   )NFFF)N    )r      )r1   r2   r3   r?   r(   r   boolrW   r   staticmethodr   r7   r8   s   @r/   r   r     ss    i /3"'%*!&
||&
 t+&
  	&

 #&
 &
P$  r0   r   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )MPNetPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )r   r?   r   ri   rB   r   Tanh
activationrL   s     r/   r?   zMPNetPooler.__init__m  s9    YYv1163E3EF
'')r0   r}   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S Nr   )r   r   )r,   r}   first_token_tensorpooled_outputs       r/   rW   zMPNetPooler.forwardr  s6     +1a40

#566r0   r   r8   s   @r/   r   r   l  s#    $
U\\ ell r0   r   c                       e Zd Zd fd	Zd Zd Ze	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	e
dz  d
e
dz  de
dz  deej                     ez  fd       Z xZS )
MPNetModelc                     t         |   |       || _        t        |      | _        t        |      | _        |rt        |      nd| _        | j                          y)zv
        add_pooling_layer (bool, *optional*, defaults to `True`):
            Whether to add a pooling layer
        N)
r   r?   r   r%   rV   r   encoderr   pooler	post_init)r,   r   add_pooling_layerr.   s      r/   r?   zMPNetModel.__init__}  sM    
 	 )&1#F+->k&)D 	r0   c                 .    | j                   j                  S r   rV   rC   r,   s    r/   get_input_embeddingszMPNetModel.get_input_embeddings  s    ...r0   c                 &    || j                   _        y r   r   )r,   values     r/   set_input_embeddingszMPNetModel.set_input_embeddings  s    */'r0   NrQ   r~   r'   rR   r   r   r   r   c                    ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }||t	        d      |#| j                  ||       |j                         }	n!||j                         d d }	nt	        d      ||j                  n|j                  }
|t        j                  |	|
      }| j                  ||	      }| j                  |||      }| j                  |||||      }|d   }| j                  | j                  |      nd }|s
||f|dd  z   S t        |||j                  |j                   	      S )
NzDYou cannot specify both input_ids and inputs_embeds at the same timer   z5You have to specify either input_ids or inputs_embeds)rZ   )rQ   r'   rR   )r~   r   r   r   r   r   )r   pooler_outputr}   r   )r   r   r   use_return_dictre   %warn_if_padding_and_no_attention_maskrP   rZ   r(   onesget_extended_attention_maskrV   r   r   r   r}   r   )r,   rQ   r~   r'   rR   r   r   r   rS   rT   rZ   extended_attention_maskembedding_outputencoder_outputssequence_outputr   s                   r/   rW   zMPNetModel.forward  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B] ]%>cdd"66y.Q#..*K&',,.s3KTUU%.%:!!@T@T!"ZZFCN040P0PQ_al0m??Y\iv?w,,2/!5# ' 
 *!,8<8OO4UY#]3oab6III)-')77&11	
 	
r0   )T)NNNNNNN)r1   r2   r3   r?   r   r   r   r(   
LongTensorFloatTensorr   r   r   r   rW   r7   r8   s   @r/   r   r   {  s    /0  .2370426)-,0#'4
##d*4
 ))D04
 &&-	4

 ((4/4
  $;4
 #Tk4
 D[4
 
u||	9	94
 4
r0   r   c                   0    e Zd ZdddZ fdZd Zd Ze	 	 	 	 	 	 	 	 ddej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  dedz  dedz  dedz  deej                     ez  fd       Z xZS )MPNetForMaskedLMz'mpnet.embeddings.word_embeddings.weightzlm_head.bias)zlm_head.decoder.weightzlm_head.decoder.biasc                     t         |   |       t        |d      | _        t	        |      | _        | j                          y NF)r   )r   r?   r   r   r!   lm_headr   rL   s     r/   r?   zMPNetForMaskedLM.__init__  s6     %@
"6* 	r0   c                 .    | j                   j                  S r   )r  decoderr   s    r/   get_output_embeddingsz&MPNetForMaskedLM.get_output_embeddings  s    ||###r0   c                 \    || j                   _        |j                  | j                   _        y r   )r  r  r$   )r,   new_embeddingss     r/   set_output_embeddingsz&MPNetForMaskedLM.set_output_embeddings  s     -*//r0   NrQ   r~   r'   rR   labelsr   r   r   r   c	           	         ||n| j                   j                  }| j                  |||||||      }
|
d   }| j                  |      }d}|Ft	               } ||j                  d| j                   j                        |j                  d            }|s|f|
dd z   }||f|z   S |S t        |||
j                  |
j                        S )a  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        Nr~   r'   rR   r   r   r   r   r   rp   losslogitsr}   r   )
r   r   r   r  r   rt   rA   r   r}   r   )r,   rQ   r~   r'   rR   r  r   r   r   rS   r   r  prediction_scoresmasked_lm_lossloss_fctr   s                   r/   rW   zMPNetForMaskedLM.forward  s    & &1%<k$++B]B]**)%'/!5#  
 "!* LL9')H%&7&<&<RAWAW&XZ`ZeZefhZijN')GABK7F3A3M^%.YSYY$!//))	
 	
r0   NNNNNNNN)r1   r2   r3   _tied_weights_keysr?   r  r  r   r(   r  r	  r   r   r   r   rW   r7   r8   s   @r/   r  r    s    "K .
$0  .2370426*.)-,0#'/
##d*/
 ))D0/
 &&-	/

 ((4//
   4'/
  $;/
 #Tk/
 D[/
 
u||	~	-/
 /
r0   r  c                   (     e Zd ZdZ fdZd Z xZS )r!   z5MPNet Head for masked and permuted language modeling.c                    t         |           t        j                  |j                  |j                        | _        t        j                  |j                  |j                        | _        t        j                  |j                  |j                  d      | _
        t        j                  t        j                  |j                              | _        y )Nr<   T)r$   )r   r?   r   ri   rB   r   rF   rG   
layer_normrA   r  	Parameterr(   zerosr$   rL   s     r/   r?   zMPNetLMHead.__init__  s    YYv1163E3EF
,,v'9'9v?T?TUyy!3!3V5F5FTRLLV->->!?@	r0   c                     | j                  |      }t        |      }| j                  |      }| j                  |      }|S r   )r   r
   r"  r  r,   featuresrS   r   s       r/   rW   zMPNetLMHead.forward  s;    JJx GOOA LLOr0   r1   r2   r3   __doc__r?   rW   r7   r8   s   @r/   r!   r!     s    ?Ar0   r!   z
    MPNet Model transformer with a sequence classification/regression head on top (a linear layer on top of the pooled
    output) e.g. for GLUE tasks.
    )custom_introc                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
edz  de	ej                     ez  fd       Z xZS )MPNetForSequenceClassificationc                     t         |   |       |j                  | _        t        |d      | _        t        |      | _        | j                          y r  )r   r?   
num_labelsr   r   MPNetClassificationHead
classifierr   rL   s     r/   r?   z'MPNetForSequenceClassification.__init__0  sC      ++%@
1&9 	r0   NrQ   r~   r'   rR   r  r   r   r   r   c	           	         ||n| j                   j                  }| j                  |||||||      }
|
d   }| j                  |      }d}|| j                   j                  | j
                  dk(  rd| j                   _        nl| j
                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                   _        nd| j                   _        | j                   j                  dk(  rIt               }| j
                  dk(  r& ||j                         |j                               }n |||      }n| j                   j                  dk(  r=t               } ||j                  d| j
                        |j                  d            }n,| j                   j                  dk(  rt               } |||      }|s|f|
d	d z   }||f|z   S |S t        |||
j                   |
j"                  
      S )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   
regressionsingle_label_classificationmulti_label_classificationr   rp   r  )r   r   r   r0  problem_typer.  rY   r(   r[   rf   r   squeezer   rt   r   r   r}   r   r,   rQ   r~   r'   rR   r  r   r   r   rS   r   r  r  r  r  r   s                   r/   rW   z&MPNetForSequenceClassification.forward:  s   ( &1%<k$++B]B]**)%'/!5#  
 "!*1{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE'!//))	
 	
r0   r  )r1   r2   r3   r?   r   r(   r  r	  r   r   r   r   rW   r7   r8   s   @r/   r,  r,  )  s      .2370426*.)-,0#'@
##d*@
 ))D0@
 &&-	@

 ((4/@
   4'@
  $;@
 #Tk@
 D[@
 
u||	7	7@
 @
r0   r,  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
edz  de	ej                     ez  fd       Z xZS )MPNetForMultipleChoicec                     t         |   |       t        |      | _        t	        j
                  |j                        | _        t	        j                  |j                  d      | _
        | j                          y )Nr   )r   r?   r   r   r   rH   rI   rJ   ri   rB   r0  r   rL   s     r/   r?   zMPNetForMultipleChoice.__init__  sV     '
zz&"<"<=))F$6$6: 	r0   NrQ   r~   r'   rR   r  r   r   r   r   c	           	         ||n| j                   j                  }||j                  d   n|j                  d   }
|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|!|j                  d|j	                  d            nd}|1|j                  d|j	                  d      |j	                  d            nd}| j                  |||||||      }|d   }| j                  |      }| j                  |      }|j                  d|
      }d}|t               } |||      }|s|f|dd z   }||f|z   S |S t        |||j                  |j                        S )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert *input_ids* indices into associated vectors than the
            model's internal embedding lookup matrix.
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        Nr   r   rq   )r'   r~   rR   r   r   r   rp   r  )r   r   r*   rt   rP   r   rJ   r0  r   r   r}   r   )r,   rQ   r~   r'   rR   r  r   r   r   rS   num_choicesflat_input_idsflat_position_idsflat_attention_maskflat_inputs_embedsr   r   r  reshaped_logitsr  r  r   s                         r/   rW   zMPNetForMultipleChoice.forward  s   H &1%<k$++B]B],5,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 ***.,/!5#  
  
]3/ ++b+6')HOV4D%''!"+5F)-)9TGf$EvE("!//))	
 	
r0   r  )r1   r2   r3   r?   r   r(   r  r	  r   r   r   r   rW   r7   r8   s   @r/   r9  r9  ~  s      .2370426*.)-,0#'L
##d*L
 ))D0L
 &&-	L

 ((4/L
   4'L
  $;L
 #TkL
 D[L
 
u||	8	8L
 L
r0   r9  c                       e Zd Z fdZe	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dedz  d	edz  d
edz  de	ej                     ez  fd       Z xZS )MPNetForTokenClassificationc                 0   t         |   |       |j                  | _        t        |d      | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _        | j                          y r  )r   r?   r.  r   r   r   rH   rI   rJ   ri   rB   r0  r   rL   s     r/   r?   z$MPNetForTokenClassification.__init__  sk      ++%@
zz&"<"<=))F$6$68I8IJ 	r0   NrQ   r~   r'   rR   r  r   r   r   r   c	           	         ||n| j                   j                  }| j                  |||||||      }
|
d   }| j                  |      }| j	                  |      }d}|<t               } ||j                  d| j                        |j                  d            }|s|f|
dd z   }||f|z   S |S t        |||
j                  |
j                        S )z
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Nr  r   r   rp   r  )r   r   r   rJ   r0  r   rt   r.  r   r}   r   r7  s                   r/   rW   z#MPNetForTokenClassification.forward  s    $ &1%<k$++B]B]**)%'/!5#  
 "!*,,71')HFKKDOO<fkk"oNDY,F)-)9TGf$EvE$!//))	
 	
r0   r  )r1   r2   r3   r?   r   r(   r  r	  r   r   r   r   rW   r7   r8   s   @r/   rC  rC    s    	  .2370426*.)-,0#'0
##d*0
 ))D00
 &&-	0

 ((4/0
   4'0
  $;0
 #Tk0
 D[0
 
u||	4	40
 0
r0   rC  c                   (     e Zd ZdZ fdZd Z xZS )r/  z-Head for sentence-level classification tasks.c                 &   t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        t        j                  |j                  |j                        | _
        y r   )r   r?   r   ri   rB   r   rH   rI   rJ   r.  out_projrL   s     r/   r?   z MPNetClassificationHead.__init__  s`    YYv1163E3EF
zz&"<"<=		&"4"4f6G6GHr0   c                     |d d dd d f   }| j                  |      }| j                  |      }t        j                  |      }| j                  |      }| j	                  |      }|S r   )rJ   r   r(   tanhrH  r&  s       r/   rW   zMPNetClassificationHead.forward$  sY    Q1WLLOJJqMJJqMLLOMM!r0   r(  r8   s   @r/   r/  r/    s    7Ir0   r/  c                   :    e Zd Z fdZe	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	edz  d
edz  dedz  de	ej                     ez  fd       Z xZS )MPNetForQuestionAnsweringc                     t         |   |       |j                  | _        t        |d      | _        t        j                  |j                  |j                        | _        | j                          y r  )
r   r?   r.  r   r   r   ri   rB   
qa_outputsr   rL   s     r/   r?   z"MPNetForQuestionAnswering.__init__0  sU      ++%@
))F$6$68I8IJ 	r0   NrQ   r~   r'   rR   start_positionsend_positionsr   r   r   r   c
           	      $   |	|	n| j                   j                  }	| j                  |||||||	      }|d   }| j                  |      }|j	                  dd      \  }}|j                  d      j                         }|j                  d      j                         }d }||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   dz  }|	s||f|dd  z   }||f|z   S |S t        ||||j                  |j                        S )	Nr  r   r   r   rr   )ignore_indexrp   )r  start_logits
end_logitsr}   r   )r   r   r   rN  splitr6  r|   lenrP   clampr   r   r}   r   )r,   rQ   r~   r'   rR   rO  rP  r   r   r   rS   r   r  r  rS  rT  
total_lossignored_indexr  
start_lossend_lossr   s                         r/   rW   z!MPNetForQuestionAnswering.forward:  s    &1%<k$++B]B]**)%'/!5#  
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J"J/'!"+=F/9/EZMF*Q6Q+%!!//))
 	
r0   )	NNNNNNNNN)r1   r2   r3   r?   r   r(   r  r	  r   r   r   r   rW   r7   r8   s   @r/   rL  rL  .  s      .23704263715)-,0#';
##d*;
 ))D0;
 &&-	;

 ((4/;
 ))D0;
 ''$.;
  $;;
 #Tk;
 D[;
 
u||	;	;;
 ;
r0   rL  c                     | j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )z
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`. :param torch.Tensor x: :return torch.Tensor:
    r   rr   )nerf   r(   cumsumtype_asr[   )rQ   r;   maskincremental_indicess       r/   rN   rN   y  sP     <<$((*D,,t3;;DADH##%33r0   )r  r9  rL  r,  rC  r   r   r   )4r)  rw   r(   r   torch.nnr   r   r    r   r"   activationsr	   r
   modeling_outputsr   r   r   r   r   r   r   modeling_utilsr   utilsr   r   configuration_mpnetr   
get_loggerr1   loggerr   Moduler%   r_   r   r   r   r   r   r   r   r  r!   r,  r9  rC  r/  rL  rN   __all__r   r0   r/   <module>rm     s       A A & '   . , , 
		H	% i? i i8=bii 8=vF FRRYY 6		  "))  <T299 Tp"))  K
% K
 K
\F
+ F
R")) , L
%9 L
L
^ X
1 X
 X
v =
"6 =
 =
@bii & G
 4 G
 G
T4	r0   