
    qi7                        d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZm Z m!Z! er	 ddiZ"dZ# ejH                  e%      Z& G d de      Z' G d dejP                        Z) G d de      Z* G d de!      Z+ G d de      Z, G d de       Z- G d d e      Z. G d! d"e      Z/ G d# d$e      Z0 G d% d&e      Z1g d'Z2y)(    )TYPE_CHECKINGN)nn   )initialization)CacheDynamicCache)PreTrainedConfig)create_causal_mask)BaseModelOutputWithPast)RopeParameters)PreTrainedModel)Unpack)TransformersKwargslogging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelLlamaRotaryEmbedding
vocab_fileztokenizer.modelu   ▁c            *       j    e Zd ZdZdZdgZddddddddZdgdgfd	d
gd	gfd	gd	gfdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d"dedz  dedz  dedz  dedz  dedz  dedz  dedz  de	dz  dedz  de
dz  dedz  dedz  dedz  dedz  dedz  dedz  deee	ef   z  dz  dedz  de
dz  d edz  f( fd!Z xZS )#GemmaConfiga  
    This is the configuration class to store the configuration of a [`GemmaModel`]. It is used to instantiate an Gemma
    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
    defaults will yield a similar configuration to that of the Gemma-7B.
    e.g. [google/gemma-7b](https://huggingface.co/google/gemma-7b)
    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        vocab_size (`int`, *optional*, defaults to 256000):
            Vocabulary size of the Gemma model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`GemmaModel`]
        hidden_size (`int`, *optional*, defaults to 3072):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 24576):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 28):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*, defaults to 16):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
            `num_attention_heads`.
        head_dim (`int`, *optional*, defaults to 256):
            The attention head dimension.
        hidden_act (`str` or `function`, *optional*, defaults to `"gelu_pytorch_tanh"`):
            The legacy activation function. It is overwritten by the `hidden_activation`.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rms_norm_eps (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        pad_token_id (`int`, *optional*, defaults to 0):
            Padding token id.
        eos_token_id (`int`, *optional*, defaults to 1):
            End of stream token id.
        bos_token_id (`int`, *optional*, defaults to 2):
            Beginning of stream token id.
        tie_word_embeddings (`bool`, *optional*, defaults to `True`):
            Whether to tie weight embeddings
        rope_parameters (`RopeParameters`, *optional*):
            Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
            a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
            with longer `max_position_embeddings`.
        attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`):
            Whether to use a bias in the query, key, value and output projection layers during self-attention.
        attention_dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        use_bidirectional_attention (`bool`, *optional*):
            If True, the model will attend to all text tokens instead of using a causal mask.

    ```python
    >>> from transformers import GemmaModel, GemmaConfig
    >>> # Initializing a Gemma gemma-7b style configuration
    >>> configuration = GemmaConfig()
    >>> # Initializing a model from the gemma-7b style configuration
    >>> model = GemmaModel(configuration)
    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```gemmapast_key_valuescolwiserowwise)zlayers.*.self_attn.q_projzlayers.*.self_attn.k_projzlayers.*.self_attn.v_projzlayers.*.self_attn.o_projzlayers.*.mlp.gate_projzlayers.*.mlp.up_projzlayers.*.mlp.down_proj	input_idsinputs_embedshidden_statesattention_mask)embed_tokenslayersnormN
vocab_sizehidden_sizeintermediate_sizenum_hidden_layersnum_attention_headsnum_key_value_headshead_dim
hidden_actmax_position_embeddingsinitializer_rangerms_norm_eps	use_cachepad_token_ideos_token_idbos_token_idtie_word_embeddingsrope_parametersattention_biasattention_dropoutuse_bidirectional_attentionc                 <   || _         |	| _        || _        || _        || _        || _        || _        || _        || _        |
| _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        t)        | T  di | y )N )r(   r0   r)   r*   r+   r,   r.   r-   r/   r1   r2   r3   r9   r:   r;   r4   r6   r5   r7   r8   super__init__)selfr(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   kwargs	__class__s                         Y/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/gemma/modular_gemma.pyr?   zGemmaConfig.__init__   s    0 %'>$&!2!2#6  #6 $!2(",!2+F((((#6 ."6"    )i  i   i `        rF      gelu_pytorch_tanhi    g{Gz?ư>Tr      r   TNFg        N)__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencebase_model_tp_planbase_model_pp_planintstrfloatboolr   dictr?   __classcell__rB   s   @rC   r   r   4   s   DL J#4"5%.%.%.%."+ )"+ &(9:#%568IJ!"_$56 "("&(-(**,*,"!4.2*.#'!%#$#$#$+/MQ&+*-37+-#$J-# 4Z-# :	-#
 :-# !4Z-# !4Z-# *-# $J-# "%t-# !4<-# Dj-# $;-# Dj-# Dj-#  Dj!-#" "D[#-#$ ($sN/B*CCdJ%-#& t'-#( !4<)-#* &*D[+-# -#rD   r   c                   <     e Zd Zddedef fdZd Zd Zd Z xZ	S )GemmaRMSNormdimepsc                     t         |           || _        t        j                  t        j                  |            | _        y )N)r>   r?   r]   r   	Parametertorchzerosweight)r@   r\   r]   rB   s      rC   r?   zGemmaRMSNorm.__init__   s.    ll5;;s#34rD   c                     |t        j                  |j                  d      j                  dd      | j                  z         z  S )Nr   T)keepdim)r`   rsqrtpowmeanr]   )r@   xs     rC   _normzGemmaRMSNorm._norm   s4    5;;quuQx}}R}>IJJJrD   c                     | j                  |j                               }|d| j                  j                         z   z  }|j                  |      S )Ng      ?)rj   rU   rb   type_as)r@   ri   outputs      rC   forwardzGemmaRMSNorm.forward   sC    AGGI& 3!2!2!445~~a  rD   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tuplerb   shaper]   )r@   s    rC   
extra_reprzGemmaRMSNorm.extra_repr   s'    ))*+6$((<<rD   )rI   )
rK   rL   rM   rS   rU   r?   rj   rn   rr   rX   rY   s   @rC   r[   r[      s&    5C 5e 5
K!=rD   r[   c                        e Zd Z fdZ xZS )GemmaMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NF)bias)	r>   r?   r   Linearr)   r*   	gate_projup_proj	down_proj)r@   configrB   s     rC   r?   zGemmaMLP.__init__   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWXrD   )rK   rL   rM   r?   rX   rY   s   @rC   rt   rt      s    Y YrD   rt   c                       e Zd Zy)GemmaRotaryEmbeddingNrK   rL   rM   r=   rD   rC   r}   r}          rD   r}   c                   ,     e Zd ZdZdedef fdZ xZS )GemmaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr{   	layer_idxc                 H    t         |           t        |dd       | _        y )Nr;   F)r>   r?   getattr	is_causal)r@   r{   r   rB   s      rC   r?   zGemmaAttention.__init__   s"    $V-JERRrD   )rK   rL   rM   rN   r   rS   r?   rX   rY   s   @rC   r   r      s"    GS{ Ss S SrD   r   c                   :    e Zd Z ej                         d        Zy)GemmaPreTrainedModelc                     t        j                  | |       d|j                  j                  v r t	        j
                  |j                         y y )NRMSNorm)r   _init_weightsrB   rK   initzeros_rb   )r@   modules     rC   r   z"GemmaPreTrainedModel._init_weights   s;    %%dF3((111KK& 2rD   N)rK   rL   rM   r`   no_gradr   r=   rD   rC   r   r      s    U]]_' 'rD   r   c                       e Zd Z	 	 	 	 	 	 	 ddej                  dz  dej
                  dz  dej                  dz  dedz  dej                  dz  dedz  dej                  dz  d	e	e
   d
efdZy)
GemmaModelNr!   r$   position_idsr   r"   r3   cache_positionrA   returnc                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }t        | j                  |||||      }
|}| j                  ||      }t        j                  | j                  j                  dz  |j                  	      }||z  }| j                  d | j                  j                    D ]  } ||f|
|||||d
|} | j#                  |      }t%        ||r|      S d       S )Nz:You must specify exactly one of input_ids or inputs_embeds)r{   r   rJ   )device)r{   r"   r$   r   r   r   )r   g      ?)dtype)r$   r   r   r3   r   position_embeddings)last_hidden_stater   )
ValueErrorr%   r   r{   get_seq_lengthr`   arangerq   r   	unsqueezer
   
rotary_embtensorr)   r   r&   r+   r'   r   )r@   r!   r$   r   r   r"   r3   r   rA   past_seen_tokenscausal_maskr#   r   
normalizerdecoder_layers                  rC   rn   zGemmaModel.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L(;;'))+%
 &"oom,oW
 \\$++"9"93">mFYFYZ
%
2![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
rD   )NNNNNNN)rK   rL   rM   r`   
LongTensorTensorr   FloatTensorrV   r   r   r   rn   r=   rD   rC   r   r      s     .2.204(,26!%26?
##d*?
 t+?
 &&-	?

 ?
 ((4/?
 $;?
 ((4/?
 +,?
 
!?
rD   r   c                        e Zd Z fdZ xZS )GemmaForCausalLMc                  8     t               j                  di | S )a|  
        Example:

        ```python
        >>> from transformers import AutoTokenizer, GemmaForCausalLM

        >>> model = GemmaForCausalLM.from_pretrained("google/gemma-7b")
        >>> tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")

        >>> prompt = "What is your favorite condiment?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "What is your favorite condiment?"
        ```r=   )r>   rn   )super_kwargsrB   s    rC   rn   zGemmaForCausalLM.forward1  s    $ uw...rD   )rK   rL   rM   rn   rX   rY   s   @rC   r   r   0  s    / /rD   r   c                       e Zd Zy)GemmaForSequenceClassificationNr~   r=   rD   rC   r   r   F  r   rD   r   c                       e Zd Zy)GemmaForTokenClassificationNr~   r=   rD   rC   r   r   J  r   rD   r   )r   r   r   r   r   r   )3typingr   r`   r    r   r   cache_utilsr   r   configuration_utilsr	   masking_utilsr
   modeling_outputsr   modeling_rope_utilsr   modeling_utilsr   processing_utilsr   utilsr   r   llama.modeling_llamar   r   r   r   r   r   r   r   VOCAB_FILES_NAMESSPIECE_UNDERLINE
get_loggerrK   loggerr   Moduler[   rt   r}   r   r   r   r   r   r   __all__r=   rD   rC   <module>r      s    !   & . 3 / 7 1 - & 0	 	 	 !#45   
		H	%E#" E#P=299 =(Yx Y	/ 	S^ S'/ '@
 @
F/' /,	%C 		"= 	rD   