
    qi                     l    d Z ddlmZ ddlmZ ddlmZ  ej                  e      Z	 G d de      Z
dgZy)zPersimmon model configuration   )PreTrainedConfig)RopeParameters)loggingc            &           e Zd ZdZdZdgZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  dedz  dedz  d	edz  d
edz  dedz  dedz  dedz  de	dz  de	dz  de
eee
f   z  dz  de	dz  dedz  dedz  dedz  dedz  dedz  f$ fdZ xZS )PersimmonConfiga  
    This is the configuration class to store the configuration of a [`PersimmonModel`]. It is used to instantiate an
    Persimmon model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the
    [adept/persimmon-8b-base](https://huggingface.co/adept/persimmon-8b-base).

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.


    Args:
        vocab_size (`int`, *optional*, defaults to 262144):
            Vocabulary size of the Persimmon model. Defines the number of different tokens that can be represented by
            the `inputs_ids` passed when calling [`PersimmonModel`]
        hidden_size (`int`, *optional*, defaults to 4096):
            Dimension of the hidden representations.
        intermediate_size (`int`, *optional*, defaults to 16384):
            Dimension of the MLP representations.
        num_hidden_layers (`int`, *optional*, defaults to 36):
            Number of hidden layers in the Transformer encoder.
        num_attention_heads (`int`, *optional*, defaults to 64):
            Number of attention heads for each attention layer in the Transformer encoder.
        hidden_act (`str` or `function`, *optional*, defaults to `"relu2"`):
            The non-linear activation function (function or string) in the decoder.
        max_position_embeddings (`int`, *optional*, defaults to 16384):
            The maximum sequence length that this model might ever be used with.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        layer_norm_eps (`float`, *optional*, defaults to 1e-5):
            The epsilon used by the rms normalization layers.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models). Only
            relevant if `config.is_decoder=True`.
        tie_word_embeddings(`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings
        rope_parameters (`RopeParameters`, *optional*):
            Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
            a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
            with longer `max_position_embeddings`.
        qk_layernorm (`bool`, *optional*, default to `True`):
            Whether or not to normalize the Queries and Keys after projecting the hidden states
        hidden_dropout (`float`, *optional*, default to 0.0):
            The dropout ratio after applying the MLP to the hidden states.
        attention_dropout (`float`, *optional*, default to 0.0):
            The dropout ratio after computing the attention scores.

        Example:

    ```python
    >>> from transformers import PersimmonModel, PersimmonConfig

    >>> # Initializing a Persimmon persimmon-7b style configuration
    >>> configuration = PersimmonConfig()
    ```	persimmonpast_key_valuesN
vocab_sizehidden_sizeintermediate_sizenum_hidden_layersnum_attention_heads
hidden_actmax_position_embeddingsinitializer_rangelayer_norm_eps	use_cachetie_word_embeddingsrope_parametersqk_layernormhidden_dropoutattention_dropoutpad_token_idbos_token_ideos_token_idc                 D   || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        || _
        || _        || _        || _        |j                  dd       || _        || _        || _        || _        t'        | P  di | y )Npartial_rotary_factorg      ? )r
   r   r   r   r   r   r   r   r   r   r   r   r   r   
setdefaultr   r   r   r   super__init__)selfr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                       g/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/persimmon/configuration_persimmon.pyr!   zPersimmonConfig.__init__S   s    , %'>$&!2!2#6 $!2,"(,!2.137#6 ((("6"    )i   i    @  $   @   relu2r'   g{Gz?gh㈵>TFNT        r+   N      )__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceintstrfloatboolr   dictr!   __classcell__)r$   s   @r%   r   r      se   5n J#4"5 "("&(-(**,!(.3*.%)!%+0MQ$('**-#'#$#$'*#$J*# 4Z*# :	*#
 :*# !4Z*# $J*# "%t*# !4<*# d
*# $;*# "D[*# ($sN/B*CCdJ*# Tk*# *#  !4<!*#" Dj#*#$ Dj%*#& Dj'*# *#r&   r   N)r1   configuration_utilsr   modeling_rope_utilsr   utilsr   
get_loggerr.   loggerr   __all__r   r&   r%   <module>r@      sA    $ 3 1  
		H	%e#& e#P 
r&   