
    qi                     H   d dl Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZmZ d	d
lmZmZmZmZ d	dlmZ ddlmZ ddlmZ  ej2                  e      ZdZ G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z"g dZ#y)    N   )Cache)FlashAttentionKwargs)GradientCheckpointingLayer)CausalLMOutputWithPast)Unpack)TransformersKwargslogging   )GlmAttentionGlmForCausalLMGlmForSequenceClassificationGlmForTokenClassification)Phi3MLP   )
Glm4Config)Glm4RMSNormzTHUDM/GLM-4-9B-0414c                       e Zd Zy)Glm4MLPN__name__
__module____qualname__     W/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/glm4/modular_glm4.pyr   r   #       r   r   c                   d    e Zd Zdedef fdZ	 	 	 	 	 	 ddej                  dej                  dz  dej                  dz  de	dz  d	e
dz  d
ej                  dz  deej                  ej                  f   dz  dee   deej                  eej                  ej                  f   dz  f   fdZ xZS )Glm4DecoderLayerconfig	layer_idxc                    t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        t        |j                  |j                        | _        t        |j                  |j                        | _        y )N)r    r!   )eps)super__init__hidden_sizeGlm4Attention	self_attnr   mlpr   rms_norm_epsinput_layernormpost_attention_layernormpost_self_attn_layernormpost_mlp_layernorm)selfr    r!   	__class__s      r   r%   zGlm4DecoderLayer.__init__(   s    !--&f	J6?*6+=+=6CVCVW(3F4F4FFL_L_(`%(3F4F4FFL_L_(`%"-f.@.@fFYFY"Zr   Nhidden_statesattention_maskposition_idspast_key_values	use_cachecache_positionposition_embeddingskwargsreturnc                    |}	| j                  |      } | j                  d|||||||d|\  }}
| j                  |      }|	|z   }|}	| j                  |      }| j	                  |      }| j                  |      }|	|z   }|S )N)r1   r2   r3   r4   r5   r6   r7   r   )r+   r(   r-   r,   r)   r.   )r/   r1   r2   r3   r4   r5   r6   r7   r8   residual_s              r   forwardzGlm4DecoderLayer.forward3   s     !,,];)4>> 	
')%+) 3	
 	
q 55mD =0 55mD///> =0r   )NNNFNN)r   r   r   r   intr%   torchTensor
LongTensorr   booltupler   r   FloatTensorr=   __classcell__r0   s   @r   r   r   '   s    	[z 	[c 	[ /304(,!&26HL!||! t+! &&-	!
 ! $;! ((4/! #5<<#=>E! -.! 
u  %(9(95;L;L(L"MPT"TT	U!r   r   c                       e Zd Zy)r'   Nr   r   r   r   r'   r'   W   r   r   r'   c                   4     e Zd Zdee   deez  f fdZ xZS )Glm4ForCausalLMsuper_kwargsr9   c                 "    t        |   di |S )ah  
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.

        Example:

        ```python
        >>> from transformers import AutoTokenizer, Glm4ForCausalLM

        >>> model = Glm4ForCausalLM.from_pretrained("THUDM/GLM-4-9B-0414")
        >>> tokenizer = AutoTokenizer.from_pretrained("THUDM/GLM-4-9B-0414")

        >>> prompt = "Hey, are you conscious? Can you talk to me?"
        >>> inputs = tokenizer(prompt, return_tensors="pt")

        >>> # Generate
        >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
        >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
        "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
        ```r   )r$   r=   )r/   rJ   r0   s     r   r=   zGlm4ForCausalLM.forward\   s    4 w...r   )	r   r   r   r   r	   rC   r   r=   rE   rF   s   @r   rI   rI   [   s+    /12/ 
'	'/ /r   rI   c                       e Zd Zy)Glm4ForSequenceClassificationNr   r   r   r   rM   rM   y   r   r   rM   c                       e Zd Zy)Glm4ForTokenClassificationNr   r   r   r   rO   rO   }   r   r   rO   )Glm4PreTrainedModel	Glm4ModelrI   rM   rO   )$r?   cache_utilsr   modeling_flash_attention_utilsr   modeling_layersr   modeling_outputsr   processing_utilsr   utilsr	   r
   glm.modeling_glmr   r   r   r   phi3.modeling_phi3r   configuration_glm4r   modeling_glm4r   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r   r'   rI   rM   rO   __all__r   r   r   <module>r`      s        B 9 6 & 0 t t ( * & 
		H	%+ 	g 	-1 -`	L 	/n /<	$@ 		!: 	r   