
    qi                     &   d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ dd	lmZ  ej"                  e      Zd
Z G d de      Z G d de      Zd ZddZ G d de      Z G d de	      Z G d de
      Z G d de      Zg dZy)    )OptionalN   )logging   )LlamaAttentionLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaRotaryEmbedding)Phi3MLP   )	GlmConfigzTHUDM/glm-4-9bc                       e Zd Zy)GlmMLPN__name__
__module____qualname__     U/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/glm/modular_glm.pyr   r   %       r   r   c                   R    e Zd Ze	 	 	 d	dedz  ded   dedz  dedef   fd       Z	y)
GlmRotaryEmbeddingNconfigdeviceztorch.deviceseq_lenreturnztorch.Tensorc                 n   | j                   d   }| j                   j                  dd      }t        | dd      xs | j                  | j                  z  }t        ||z        }d}d|t        j                  d|dt        j                        j                  |t        j                  	      |z  z  z  }||fS )
a  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetapartial_rotary_factorg      ?head_dimNr   r   )dtype)r   r#   )rope_parametersgetgetattrhidden_sizenum_attention_headsinttorcharangeint64tofloat)	r   r   r   baser!   r"   dimattention_factorinv_freqs	            r   compute_default_rope_parametersz2GlmRotaryEmbedding.compute_default_rope_parameters*   s    & %%l3 & 6 6 : :;RTW X6:t4h8J8JfNhNh8h(223 U\\!S!5;;?BB&X]XcXcBdgjjk
 )))r   )NNN)
r   r   r   staticmethodr   r   r)   tupler.   r3   r   r   r   r   r   )   sY    #'+/"*D *(* t* 
~u$	%	* *r   r   c                 |    | ddddf   }| ddddf   }t        j                  | |fd      j                  d      S )	z*Rotates half the hidden dims of the input..r   Nr   r   r0   )r*   stackflatten)xx1x2s      r   rotate_halfr?   K   sJ    	
319B	
319B;;Ryb)11"55r   c                    |j                  |      }|j                  |      }|dd|j                  d   dz  f   j                  dd      }|dd|j                  d   dz  f   j                  dd      }|j                  d   }| dd|f   | d|df   }}|dd|f   |d|df   }	}||z  t        |      |z  z   }
||z  t        |      |z  z   }t	        j
                  |
|gd      }
t	        j
                  ||	gd      }|
|fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    .Nr7   r   r8   )	unsqueezeshaperepeat_interleaver?   r*   cat)qkcossinunsqueeze_dim
rotary_dimq_rotq_passk_rotk_passq_embedk_embeds               r   apply_rotary_pos_embrQ   R   sD   $ --
&C
--
&C c'SYYr]a'''
(
:
:1"
:
EC
c'SYYr]a'''
(
:
:1"
:
EC 2Jc;J;&'3
+;)<6Ec;J;&'3
+;)<6E s{{51C78Gs{{51C78G ii&)r2Gii&)r2GGr   c                   0     e Zd Zddededz  f fdZ xZS )GlmAttentionNr   	layer_idxc                     t         |   ||       t        j                  |j                  | j
                  z  |j                  d      | _        y )NF)bias)super__init__nnLinearr(   r"   r'   o_proj)selfr   rT   	__class__s      r   rX   zGlmAttention.__init__{   s<    +ii : :T]] JFL^L^ejkr   )N)r   r   r   r   r)   rX   __classcell__)r]   s   @r   rS   rS   z   s#    ly lS4Z l lr   rS   c                       e Zd Zy)GlmForCausalLMNr   r   r   r   r`   r`      r   r   r`   c                       e Zd Zy)GlmForSequenceClassificationNr   r   r   r   rb   rb      r   r   rb   c                       e Zd Zy)GlmForTokenClassificationNr   r   r   r   rd   rd      r   r   rd   )GlmPreTrainedModelGlmModelr`   rb   rd   )r   )typingr   r*   torch.nnrY   utilsr   llama.modeling_llamar   r   r	   r
   r   phi3.modeling_phi3r   configuration_glmr   
get_loggerr   logger_CHECKPOINT_FOR_DOCr   r   r?   rQ   rS   r`   rb   rd   __all__r   r   r   <module>rq      s         ) ( 
		H	%& 	W 	*- *D6%Pl> l	% 		#A 		 ; 	r   