
    qiX                     P   d Z ddlZddlmZmZmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZ ddlmZ d	d
lmZmZmZmZmZmZmZmZ  G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z" G d de      Z# G d de      Z$ G d de      Z%g dZ&y)zPyTorch CamemBERT model.    N)BCEWithLogitsLossCrossEntropyLossMSELoss   ),BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)Unpack)TransformersKwargsauto_docstring)can_return_tuple   )RobertaForCausalLMRobertaForMaskedLMRobertaForMultipleChoiceRobertaForQuestionAnswering RobertaForSequenceClassificationRobertaForTokenClassificationRobertaModelRobertaPreTrainedModelc                       e Zd ZdZy)CamembertPreTrainedModelrobertaN)__name__
__module____qualname__base_model_prefix     a/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/camembert/modular_camembert.pyr   r   ,   s    !r#   r   c                       e Zd Zy)CamembertModelN)r   r   r    r"   r#   r$   r&   r&   0   s    r#   r&   c                   t    e Zd ZdddZ fdZee	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dej                  dz  dej                  dz  dej                  dz  de
e   deej                     ez  fd              Z xZS )CamembertForMaskedLMz)roberta.embeddings.word_embeddings.weightzlm_head.bias)zlm_head.decoder.weightzlm_head.decoder.biasc                 L    t         |   |       | `t        |d      | _        y NF)add_pooling_layersuper__init__	camembertr&   r   selfconfig	__class__s     r$   r.   zCamembertForMaskedLM.__init__:   #     N%fFr#   N	input_idsattention_masktoken_type_idsposition_idsinputs_embedsencoder_hidden_statesencoder_attention_masklabelskwargsreturnc	                 t    | j                   |f||||||dd|	}
|
d   }| j                  |      }d}|a|j                  |j                        }t	               } ||j                  d| j                  j                        |j                  d            }t        |||
j                  |
j                        S )a  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        T)r6   r7   r8   r9   r:   r;   return_dictr   Nlosslogitshidden_states
attentions)r   lm_headtodevicer   viewr2   
vocab_sizer	   rE   rF   )r1   r5   r6   r7   r8   r9   r:   r;   r<   r=   outputssequence_outputprediction_scoresmasked_lm_lossloss_fcts                  r$   forwardzCamembertForMaskedLM.forward@   s    : $,,

))%'"7#9

 

 "!* LL9YY0778F')H%&7&<&<RAWAW&XZ`ZeZefhZijN$!//))	
 	
r#   )NNNNNNNN)r   r   r    _tied_weights_keysr.   r   r   torch
LongTensorFloatTensorr   r   tupleTensorr	   rQ   __classcell__r3   s   @r$   r(   r(   4   s   "M .
G  .237260426:>;?*.5
##d*5
 ))D05
 ((4/	5

 &&-5
 ((4/5
  %00475
 !& 1 1D 85
   4'5
 +,5
 
u||	~	-5
  5
r#   r(   c                   *    e Zd Z fdZee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	e	e
   d
eej                     ez  fd              Z xZS )"CamembertForSequenceClassificationc                 L    t         |   |       | `t        |d      | _        y r*   r,   r0   s     r$   r.   z+CamembertForSequenceClassification.__init__{   r4   r#   Nr5   r6   r7   r8   r9   r<   r=   r>   c           	          | j                   |f||||dd|}|d   }	| j                  |	      }
d}||j                  |
j                        }| j                  j
                  | j                  dk(  rd| j                  _        nl| j                  dkD  rL|j                  t        j                  k(  s|j                  t        j                  k(  rd| j                  _        nd| j                  _        | j                  j
                  dk(  rIt               }| j                  dk(  r& ||
j                         |j                               }n ||
|      }n| j                  j
                  dk(  r=t               } ||
j                  d	| j                        |j                  d	            }n,| j                  j
                  dk(  rt               } ||
|      }t!        ||
|j"                  |j$                  
      S )a  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Tr6   r7   r8   r9   r@   r   N   
regressionsingle_label_classificationmulti_label_classificationrA   rB   )r   
classifierrH   rI   r2   problem_type
num_labelsdtyperS   longintr   squeezer   rJ   r   r   rE   rF   r1   r5   r6   r7   r8   r9   r<   r=   rL   rM   rD   rC   rP   s                r$   rQ   z*CamembertForSequenceClassification.forward   s   6 $,,
))%'
 
 "!*1YYv}}-F{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#FF3D))-JJ+-B @&++b/R))-II,./'!//))	
 	
r#   NNNNNN)r   r   r    r.   r   r   rS   rT   rU   r   r   rV   rW   r   rQ   rX   rY   s   @r$   r[   r[   z   s    G  .237260426*.C
##d*C
 ))D0C
 ((4/	C

 &&-C
 ((4/C
   4'C
 +,C
 
u||	7	7C
  C
r#   r[   c                   *    e Zd Z fdZee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	e	e
   d
eej                     ez  fd              Z xZS )CamembertForMultipleChoicec                 L    t         |   |       | `t        |d      | _        y r*   r,   r0   s     r$   r.   z#CamembertForMultipleChoice.__init__   r4   r#   Nr5   r7   r6   r<   r8   r9   r=   r>   c           	      "   ||j                   d   n|j                   d   }|!|j                  d|j                  d            nd}	|!|j                  d|j                  d            nd}
|!|j                  d|j                  d            nd}|!|j                  d|j                  d            nd}|1|j                  d|j                  d      |j                  d            nd} | j                  |	f|
|||dd|}|d   }| j	                  |      }| j                  |      }|j                  d|      }d}|.|j                  |j                        }t               } |||      }t        |||j                  |j                        S )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        Nr_   rA   T)r8   r7   r6   r9   r@   rB   )shaperJ   sizer   dropoutrc   rH   rI   r   r
   rE   rF   )r1   r5   r7   r6   r<   r8   r9   r=   num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsrL   pooled_outputrD   reshaped_logitsrC   rP   s                       r$   rQ   z"CamembertForMultipleChoice.forward   s   V -6,Aiooa(}GZGZ[\G]CLCXINN2,>?^bLXLdL--b,2C2CB2GHjnR`Rln11"n6I6I"6MNrvR`Rln11"n6I6I"6MNrv ( r=#5#5b#9=;M;Mb;QR 	 $,,
*..,
 
  
]3/ ++b+6YY556F')HOV4D("!//))	
 	
r#   rk   )r   r   r    r.   r   r   rS   rT   rU   r   r   rV   rW   r
   rQ   rX   rY   s   @r$   rm   rm      s    G  .22637*.0426P
##d*P
 ((4/P
 ))D0	P

   4'P
 &&-P
 ((4/P
 +,P
 
u||	8	8P
  P
r#   rm   c                   *    e Zd Z fdZee	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	e	e
   d
eej                     ez  fd              Z xZS )CamembertForTokenClassificationc                 L    t         |   |       | `t        |d      | _        y r*   r,   r0   s     r$   r.   z(CamembertForTokenClassification.__init__&  r4   r#   Nr5   r6   r7   r8   r9   r<   r=   r>   c           	      ~    | j                   |f||||dd|}|d   }	| j                  |	      }	| j                  |	      }
d}|W|j                  |
j                        }t               } ||
j                  d| j                        |j                  d            }t        ||
|j                  |j                        S )a-  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        Tr^   r   NrA   rB   )r   rs   rc   rH   rI   r   rJ   re   r   rE   rF   rj   s                r$   rQ   z'CamembertForTokenClassification.forward,  s    2 $,,
))%'
 
 "!*,,71YYv}}-F')HFKKDOO<fkk"oND$!//))	
 	
r#   rk   )r   r   r    r.   r   r   rS   rT   rU   r   r   rV   rW   r   rQ   rX   rY   s   @r$   r}   r}   %  s    G  .237260426*.2
##d*2
 ))D02
 ((4/	2

 &&-2
 ((4/2
   4'2
 +,2
 
u||	4	42
  2
r#   r}   c                   J    e Zd Z fdZee	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
e	e
   deej                     ez  fd              Z xZS )CamembertForQuestionAnsweringc                 L    t         |   |       | `t        |d      | _        y r*   r,   r0   s     r$   r.   z&CamembertForQuestionAnswering.__init__d  r4   r#   Nr5   r6   r7   r8   r9   start_positionsend_positionsr=   r>   c           	          | j                   |f||||dd|}	|	d   }
| j                  |
      }|j                  dd      \  }}|j                  d      j	                         }|j                  d      j	                         }d}||t        |j                               dkD  r|j                  d      }t        |j                               dkD  r|j                  d      }|j                  d      }|j                  d|      }|j                  d|      }t        |      } |||      } |||      }||z   d	z  }t        ||||	j                  |	j                  
      S )a[  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        Tr^   r   r_   rA   )dimN)ignore_indexr   )rC   start_logits
end_logitsrE   rF   )r   
qa_outputssplitri   
contiguouslenrr   clampr   r   rE   rF   )r1   r5   r6   r7   r8   r9   r   r   r=   rL   rM   rD   r   r   
total_lossignored_indexrP   
start_lossend_losss                      r$   rQ   z%CamembertForQuestionAnswering.forwardj  s   0 $,,
))%'
 
 "!*1#)<<r<#: j#++B/::<''+668

&=+D?'')*Q."1"9"9""==%%'(1, - 5 5b 9(--a0M-33A}EO)//=AM']CH!,@J
M:H$x/14J+%!!//))
 	
r#   )NNNNNNN)r   r   r    r.   r   r   rS   rT   rU   r   r   rV   rW   r   rQ   rX   rY   s   @r$   r   r   c  s    G  .2372604263715>
##d*>
 ))D0>
 ((4/	>

 &&->
 ((4/>
 ))D0>
 ''$.>
 +,>
 
u||	;	;>
  >
r#   r   c                        e Zd Z fdZee	 	 	 	 	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  de	e	ej                        dz  de
dz  dej                  dz  deej                  z  dee   de	ej                     ez  fd              Z xZS )CamembertForCausalLMc                 L    t         |   |       | `t        |d      | _        y r*   r,   r0   s     r$   r.   zCamembertForCausalLM.__init__  r4   r#   Nr5   r6   r7   r8   r9   r:   r;   r<   past_key_values	use_cachecache_positionlogits_to_keepr=   r>   c                    |d}
 | j                   |f|||||||	|
|dd
|}|j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }d}|* | j                  d||| j                  j                  d|}t        |||j                  |j                  |j                  |j                        S )aq  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
            `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
            ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Example:

        ```python
        >>> from transformers import AutoTokenizer, CamembertForCausalLM, AutoConfig
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("almanach/camembert-base")
        >>> config = AutoConfig.from_pretrained("almanach/camembert-base")
        >>> config.is_decoder = True
        >>> model = CamembertForCausalLM.from_pretrained("almanach/camembert-base", config=config)

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> prediction_logits = outputs.logits
        ```NFT)
r6   r7   r8   r9   r:   r;   r   r   r   r@   )rD   r<   rK   )rC   rD   r   rE   rF   cross_attentionsr"   )r   last_hidden_state
isinstancerh   slicerG   loss_functionr2   rK   r   r   rE   rF   r   )r1   r5   r6   r7   r8   r9   r:   r;   r<   r   r   r   r   r=   rL   rE   slice_indicesrD   rC   s                      r$   rQ   zCamembertForCausalLM.forward  s    b I@LA
))%'"7#9+)A
 A
  118B>SV8W~ot4]kmA}a,?@A%4%%pVFt{{OeOepiopD0#33!//))$55
 	
r#   )NNNNNNNNNNNr   )r   r   r    r.   r   r   rS   rT   rU   rV   boolrW   rh   r   r   r   rQ   rX   rY   s   @r$   r   r     s}   G  .237260426:>;?*.BF!%.2-.Q
##d*Q
 ))D0Q
 ((4/	Q

 &&-Q
 ((4/Q
  %0047Q
 !& 1 1D 8Q
   4'Q
 uU%6%6784?Q
 $;Q
 t+Q
 ell*Q
 +,Q
 
u||	@	@Q
  Q
r#   r   )r   r(   rm   r   r[   r}   r&   r   )'__doc__rS   torch.nnr   r   r   modeling_outputsr   r   r	   r
   r   r   r   processing_utilsr   utilsr   r   utils.genericr   roberta.modeling_robertar   r   r   r   r   r   r   r   r   r&   r(   r[   rm   r}   r   r   __all__r"   r#   r$   <module>r      s      A A   ' 7 -	 	 	"5 "	\ 	C
- C
LL
)I L
^Y
!9 Y
x;
&C ;
|G
$? G
TZ
- Z
z	r#   