
    qiM                         d Z ddlmZ ddlmZ ddlmZ  ej                  e      Z	 G d de      Z
 G d de      Z G d	 d
e      Z G d de      Z G d de      Zg dZy)zBlt model configuration   )PreTrainedConfig)RopeParameters)loggingc                         e Zd ZdZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  dedz  dedz  d	edz  d
edz  dedz  dedz  dedz  dedz  dedz  de	e
ee	f   z  dz  dedz  dedz  dedz  f fdZ xZS )BltLocalEncoderConfigzB
    Configuration class for the Blt Local Encoder component.
    blt_local_encoder    AN
vocab_sizecross_attn_all_layerscross_attn_khidden_size_globalhidden_sizenum_attention_headsnum_key_value_headsnum_hidden_layersrms_norm_epsdropoutmax_position_embeddingsrope_parameters
hidden_actintermediate_sizeinitializer_rangec                 d   || _         || _        || _        || _        || _        || _        |xs || _        ||z  | _        |xs t        d|z  dz        | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        |j#                  dd        t%        | L  di |ddi y )N   r   tie_word_embeddingsF )r
   r   r   r   r   r   r   head_dimintr   r   r   r   r   r   r   r   popsuper__init__)selfr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   kwargs	__class__s                    [/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/blt/configuration_blt.pyr!   zBltLocalEncoderConfig.__init__    s    & %%:"("4&#6 #6#M:M #'::!2!Nc!k/A:M6N!2('>$$!2. 	

($/=6=u=    )  F            N   h㈵>         `  Nsilu   {Gz?__name__
__module____qualname____doc__
model_typedefault_thetar   boolfloatr   dictstrr!   __classcell__r$   s   @r%   r   r      s-    %JM "%-2#$)-"&*,*.()%) #.3MQ!'(,*.!&>$J&>  $d{&> Dj	&>
  $J&> 4Z&> !4Z&> !4Z&> :&> dl&> &> "%t&> ($sN/B*CCdJ&> $J&> :&>  !4<!&> &>r&   r   c            (       "    e Zd ZdZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  dedz  dedz  d	edz  d
edz  dedz  dedz  dedz  dedz  dedz  de	e
ee	f   z  dz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  f& fdZ xZS )BltLocalDecoderConfigzB
    Configuration class for the Blt Local Decoder component.
    blt_local_decoderr	   Nr
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   pad_token_idbos_token_ideos_token_idr   c                 p   || _         || _        || _        || _        || _        || _        |xs || _        ||z  | _        |xs t        d|z  dz        | _	        || _
        |	| _        |
| _        || _        || _        || _        || _        || _        || _        d| _        || _        t+        | X  di | y Nr   r   Fr   )r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rC   rD   rE   r   r   r    r!   )r"   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rC   rD   rE   r   r#   r$   s                        r%   r!   zBltLocalDecoderConfig.__init__Q   s    . %%:"("4&#6 #6#M:M #'::!2!Nc!k/A:M6N!2('>$$!2(((#( ."6"r&   )r'   Tr(   r)   r*   r+   N	   r-   r.   r/   Nr0   r1   r2   NNNFr3   r?   s   @r%   rA   rA   I   sq    %JM "%-1#$)-"&*,*.()%) #.3MQ!'(,*.#'#'#'+0),#$J,#  $d{,# Dj	,#
  $J,# 4Z,# !4Z,# !4Z,# :,# dl,# ,# "%t,# ($sN/B*CCdJ,# $J,# :,#  !4<!,#" Dj#,#$ Dj%,#& Dj',#( "D[),# ,#r&   rA   c                        e Zd ZdZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  dedz  dedz  d	edz  d
edz  dedz  dee	e
ef   z  dz  de
dz  dedz  dedz  dedz  f fdZ xZS )BltGlobalTransformerConfigzG
    Configuration class for the Blt Global Transformer component.
    blt_global_transformerr	   Nr   r   r   r   r   r   r   r   r   r   r   r   c                    || _         || _        |xs || _        ||z  | _        |
xs t	        d|z  dz        | _        || _        || _        || _        || _	        |	| _
        || _        d| _        || _        t        | <  di | y rG   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   )r"   r   r   r   r   r   r   r   r   r   r   r   r   r#   r$   s                 r%   r!   z#BltGlobalTransformerConfig.__init__   s      '#6 #6#M:M #'::!2!Nc!k/A:M6N!2('>$$!2#( ."6"r&   )r)   r+   N   r-   r.      Nr0   i   r2   F)r4   r5   r6   r7   r8   r9   r   r;   r   r<   r=   r:   r!   r>   r?   s   @r%   rJ   rJ      s     *JM #'*,*.(*%) #.2MQ!'(,*.+0#4Z# !4Z# !4Z	#
 :# dl# # "%t# ($sN/B*CCdJ# $J# :# !4<# "D[# #r&   rJ   c                        e Zd ZdZdZ	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  dedz  dedz  dedz  d	edz  d
edz  dedz  dedz  deee	ef   z  dz  dedz  de
dz  f fdZ xZS )BltPatcherConfiga	  
    Configuration class for the Blt Patcher/Entropy model component.

    Args:
        vocab_size (`int`, *optional*, defaults to 260):
            Vocabulary size of the Blt patcher model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling the patcher model.
        hidden_size (`int`, *optional*, defaults to 768):
            Dimension of the hidden representations.
        num_hidden_layers (`int`, *optional*, defaults to 14):
            Number of hidden layers in the Transformer decoder.
        num_attention_heads (`int`, *optional*, defaults to 12):
            Number of attention heads for each attention layer in the Transformer decoder.
        num_key_value_heads (`int`, *optional*):
            This is the number of key_value heads that should be used to implement Grouped Query Attention. If
            `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
            `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
            converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
            by meanpooling all the original heads within that group. For more details, check out [this
            paper](https://huggingface.co/papers/2305.13245). If it is not specified, will default to
            `num_attention_heads`.
        max_position_embeddings (`int`, *optional*, defaults to 8192):
            The maximum sequence length that this model might ever be used with.
        rms_norm_eps (`float`, *optional*, defaults to 1e-05):
            The epsilon used by the rms normalization layers.
        dropout (`float`, *optional*, defaults to 0.0):
            The dropout ratio for the attention probabilities.
        intermediate_size (`int`, *optional*, defaults to 2048):
            Dimension of the MLP representations.
        rope_parameters (`RopeParameters`, *optional*):
            Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
            a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
            with longer `max_position_embeddings`.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
    blt_patcherNr
   r   r   r   r   r   r   r   r   r   r   r   c                 0   || _         || _        || _        || _        ||z  | _        ||n|| _        || _        || _        || _        d| _	        |	xs t        d| j                  z  dz        | _        || _        |
| _        d| _        t        | @  di | y )Nr0   r   r   Fr   )r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   )r"   r
   r   r   r   r   r   r   r   r   r   r   r   r#   r$   s                 r%   r!   zBltPatcherConfig.__init__   s      %&!2#6 #':::M:Y#6_r '>$( !2!Sc!d>N>N:NQR:R6S!2.#( "6"r&   )r'   i         Ni    r-   r.   r)   Nr2   F)r4   r5   r6   r7   r8   r   r;   r   r<   r=   r:   r!   r>   r?   s   @r%   rP   rP      s    #J J "%"%(**,*..2%) #(,MQ*.+0#$J# 4Z# :	#
 !4Z# !4Z# "%t# dl# # :# ($sN/B*CCdJ# !4<# "D[# #r&   rP   c            0       Z    e Zd ZdZdZdgZdZeee	e
dZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedz  dedz  d	edz  d
edz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  dedz  deeeef   z  dz  f, fdZ xZS )	BltConfiga<  
    This is the configuration class to store the configuration of a [`BltModel`]. It is used to instantiate a
    Blt model according to the specified arguments, defining the model architecture.

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        vocab_size (`int`, *optional*, defaults to 260):
            Vocabulary size of the Blt model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`BltModel`].
        max_position_embeddings (`int`, *optional*, defaults to 4096):
            The maximum sequence length that this model might ever be used with.
        patch_in_forward (`bool`, *optional*, defaults to `True`):
            Whether to perform patching during the forward pass.
        patch_size (`int`, *optional*, defaults to 4):
            Size of the patches used in the patching mechanism.
        patching_mode (`str`, *optional*, defaults to `"entropy"`):
            The mode used for patching, such as entropy-based patching.
        patching_threshold (`float`, *optional*, defaults to 1.34):
            Threshold value used for determining when to apply patches.
        patching_batch_size (`int`, *optional*, defaults to 1):
            Batch size used during the patching process.
        max_patch_length (`int`, *optional*):
            Maximum length of patches that can be generated.
        cross_attn_k (`int`, *optional*, defaults to 2):
            Number of cross-attention heads used in the model.
        encoder_hash_byte_group_size (`list`, *optional*):
            List of byte group sizes used in the encoder hash function.
        encoder_hash_byte_group_vocab (`int`, *optional*, defaults to 500002):
            Vocabulary size for the encoder hash byte groups.
        encoder_hash_byte_group_nb_functions (`int`, *optional*, defaults to 1):
            Number of hash functions used in the encoder byte grouping.
        patcher_config (`BltPatcherConfig`, *optional*):
            Configuration for the patcher component of the model.
        encoder_config (`BltLocalEncoderConfig`, *optional*):
            Configuration for the local encoder component of the model.
        decoder_config (`BltLocalDecoderConfig`, *optional*):
            Configuration for the local decoder component of the model.
        global_config (`BltGlobalTransformerConfig`, *optional*):
            Configuration for the global transformer component of the model.
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings.
        initializer_range (`float`, *optional*, defaults to 0.02):
            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
        rope_parameters (`RopeParameters`, *optional*):
            Dictionary containing the configuration parameters for the RoPE embeddings. The dictionary should contain
            a value for `rope_theta` and optionally parameters used for scaling in case you want to use RoPE
            with longer `max_position_embeddings`.

    ```python
    >>> from transformers import BltModel, BltConfig

    >>> # Initializing a Blt configuration
    >>> configuration = BltConfig()

    >>> # Initializing a model from the configuration
    >>> model = BltModel(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```

    Checkpoint: [facebook/blt](https://huggingface.co/facebook/blt)
    bltpast_key_valuesr	   )patcher_configencoder_configdecoder_configglobal_configNr
   r   patch_in_forward
patch_sizepatching_modepatching_thresholdpatching_batch_sizemax_patch_lengthr   encoder_hash_byte_group_sizeencoder_hash_byte_group_vocab$encoder_hash_byte_group_nb_functionsrY   rZ   r[   r\   r   rC   rD   rE   r   r   c                    || _         || _        || _        || _        || _        || _        || _        || _        || _        |j                  dd      | _
        |j                  dd      | _        |j                  d      | _        |j                  dd      | _        |	| _        |
xs g d| _        || _        || _        |'t%        |	      | _        t(        j+                  d
       nJt-        |t.              r#|j1                  d|       t%        di || _        nt-        |t$              r|| _        |'t3        |	      | _        t(        j+                  d       nJt-        |t.              r#|j1                  d|       t3        di || _        nt-        |t2              r|| _        |'t7        |	      | _        t(        j+                  d       nJt-        |t.              r#|j1                  d|       t7        di || _        nt-        |t6              r|| _        |'t;        |	      | _        t(        j+                  d       nJt-        |t.              r#|j1                  d|       t;        di || _        nt-        |t:              r|| _        | j4                  j>                  | j                  z  }|| j<                  j>                  k7  r|nd | j<                  _         || _!        || _"        || _#        || _$        || _%        tM        |   di | y )Npatching_devicecudarealtime_patchingTpatching_threshold_addmonotonicityF)r               r   )r   z8patcher_config is None, using default Blt patcher configr   z8encoder_config is None, using default Blt encoder configz8decoder_config is None, using default Blt decoder configz6global_config is None, using default Blt global configr   )(r
   r   r   r]   r^   r_   r`   ra   rb   getrg   ri   rj   rk   r   rc   rd   re   rP   rY   loggerinfo
isinstancer<   
setdefaultr   rZ   rA   r[   rJ   r\   r   encoder_cross_output_sizerC   rD   rE   r   r   r    r!   )r"   r
   r   r]   r^   r_   r`   ra   rb   r   rc   rd   re   rY   rZ   r[   r\   r   rC   rD   rE   r   r   r#   ru   r$   s                            r%   r!   zBltConfig.__init__@  s   6 %'>$!2 !1$*"4#6  0%zz*;VD!',?!F&,jj1I&J#"JJ~u= ) -I,^L^)-J*4X1 !"2EV"WDKKRS-%%&9;LM"2"D^"DD(89"0D!"7J["\DKKRS-%%&9;LM"7"I."ID(=>"0D!"7J["\DKKRS-%%&9;LM"7"I."ID(=>"0D !;N_!`DKKPQt,$$%8:KL!;!Lm!LD'AB!.D %)$7$7$C$CdFWFW$W!)BdFXFXFdFd)d%jn 	4 )((#6 ."6"r&   )r'   rN   Trl   entropyg   ]?r,   Nr(   Ni" r,   NNNNFNNNr2   N)r4   r5   r6   r7   r8   keys_to_ignore_at_inferencer9   rP   r   rA   rJ   sub_configsr   r:   r=   r;   r<   r   r!   r>   r?   s   @r%   rV   rV      s   @D J#4"5M*//3	K "%.2(,!"$-+<*+'+#$374:;<&*&*&*%)+0#'#'#'*.MQ/d#$Jd# "%td# +	d#
 $Jd# Tzd# "DLd# !4Zd# *d# Djd# '*Djd# (+Tzd# /2Djd# td# td#  t!d#" d{#d#$ "D[%d#& Dj'd#( Dj)d#* Dj+d#, !4<-d#. ($sN/B*CCdJ/d# d#r&   rV   )rV   rP   r   rA   rJ   N)r7   configuration_utilsr   modeling_rope_utilsr   utilsr   
get_loggerr4   rq   r   rA   rJ   rP   rV   __all__r   r&   r%   <module>r~      sx     3 1  
		H	%.>, .>b4#, 4#n&#!1 &#RG#' G#Tq#  q#hr&   