
    qi*                     <    d dl mZ ddlmZmZ  G d de      ZdgZy)   )PreTrainedConfig   )CONFIG_MAPPING
AutoConfigc                   J     e Zd ZdZdZeedZdgZ	 	 	 	 	 	 	 	 	 d fd	Z xZ	S )Glm46VConfiga  
    This is the configuration class to store the configuration of a [`Glm4vModel`]. It is used to instantiate a
    GLM-4.6V model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of
    GLM-4.1V-9B-Thinking [zai-org/GLM-4.1V-9B-Thinking](https://huggingface.co/zai-org/GLM-4.1V-9B-Thinking).

    Configuration objects inherit from [`PreTrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PreTrainedConfig`] for more information.

    Args:
        text_config (`Union[PreTrainedConfig, dict]`, *optional*, defaults to `Glm4vTextConfig`):
            The config object or dictionary of the text backbone.
        vision_config (`Union[PreTrainedConfig, dict]`,  *optional*, defaults to `Glm4vVisionConfig`):
            The config object or dictionary of the vision backbone.
        image_token_id (`int`, *optional*, defaults to 151343):
            The image token index to encode the image prompt.
        video_token_id (`int`, *optional*, defaults to 151344):
            The video token index to encode the image prompt.
        image_start_token_id (`int`, *optional*, defaults to 151339):
            The image start token index to encode the start of image.
        image_end_token_id (`int`, *optional*, defaults to 151340):
            The image end token index to encode the end of image.
        video_start_token_id (`int`, *optional*, defaults to 151361):
            The video start token index to encode the start of video.
        video_end_token_id (`int`, *optional*, defaults to 151362):
            The video end token index to encode the end of video.
        tie_word_embeddings (`bool`, *optional*, defaults to `False`):
            Whether to tie weight embeddings

    ```python
    >>> from transformers import Glm46VForConditionalGeneration, Glm46VConfig

    >>> # Initializing a GLM-4.6V style configuration
    >>> configuration = Glm46VConfig()

    >>> # Initializing a model from the GLM-4.6V style configuration
    >>> model = Glm4vForConditionalGeneration(configuration)

    >>> # Accessing the model configuration
    >>> configuration = model.config
    ```glm46v)text_configvision_configpast_key_valuesc
                    t        |t              r,|j                  dd      |d<   t        |d      di || _        n|t        d          | _        t        |t              r,|j                  dd      |d<   t        |d      di || _        n|t        d          | _        || _        || _        || _        || _	        || _
        || _        |	| _        t        | 8  di |
 y )N
model_typeglm4v_vision
glm4v_text )
isinstancedictgetr   r   r
   image_token_idvideo_token_idvideo_start_token_idvideo_end_token_idimage_start_token_idimage_end_token_idtie_word_embeddingssuper__init__)selfr
   r   r   r   r   r   r   r   r   kwargs	__class__s              a/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/glm46v/configuration_glm46v.pyr   zGlm46VConfig.__init__I   s     mT**7*;*;L.*YM,'!/l0K!L!]}!]D"!/!?!ADk4((3l(SK%-k,.GHW;WD -l;=D,,$8!"4$8!"4#6 "6"    )	NNi/O i0O i+O i,O iAO iBO F)
__name__
__module____qualname____doc__r   r   sub_configskeys_to_ignore_at_inferencer   __classcell__)r    s   @r!   r   r      sJ    (T J",zJK#4"5 #!#!!!# !#r"   r   N)configuration_utilsr   autor   r   r   __all__r   r"   r!   <module>r-      s'   , 4 -P## P#f 
r"   