
    qiZN                     V   d dl mZ d dlmZ d dlZd dlmc mZ d dlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&  e       r	d dl'm(Z(m)Z) nd\  Z(Z)e(e)fZ* e+e*      Z, ejZ                  e.      Z/ G d de"      Z0 G d de      Z1 G d dejd                        Z3 G d d      Z4 G d de      Z5 G d d ejd                        Z6 G d! d"e      Z7 G d# d$e!      Z8 G d% d&e       Z9 G d' d(e      Z:g d)Z;y)*    )Callable)AnyN)nn   )create_causal_mask)GradientCheckpointingLayer)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)is_causal_conv1d_availableis_torchdynamo_compiling   )apply_mask_to_padding_states)Gemma2RotaryEmbedding)LlamaAttentionLlamaForCausalLM
LlamaModelLlamaPreTrainedModelLlamaRMSNormapply_rotary_pos_embeager_attention_forward   )
Lfm2Config)causal_conv1d_fncausal_conv1d_updateNNc                       e Zd Zy)Lfm2RMSNormN__name__
__module____qualname__     W/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/lfm2/modular_lfm2.pyr    r    7       r&   r    c                       e Zd Zy)Lfm2RotaryEmbeddingNr!   r%   r&   r'   r*   r*   ;   r(   r&   r*   c                   *     e Zd Zdef fdZd Z xZS )Lfm2MLPconfigc                    t         |           |j                  }|j                  rat	        d|z  dz        }|j
                  Dt	        |j
                  |z        }|j                  ||j                  z   dz
  |j                  z  z  }t        j                  |j                  |d      | _
        t        j                  |j                  |d      | _        t        j                  ||j                  d      | _        y )Nr   r   r   Fbias)super__init__intermediate_sizeblock_auto_adjust_ff_dimintblock_ffn_dim_multiplierblock_multiple_ofr   Linearhidden_sizew1w3w2)selfr-   r3   	__class__s      r'   r2   zLfm2MLP.__init__@   s    "44** #A(9$9A$= >..:$'(G(GJ[([$\!$*$<$<&)A)AAAE&JbJbb%! ))F..0AN))F..0AN))-v/A/ANr&   c                     | j                  t        j                  | j                  |            | j	                  |      z        S N)r<   Fsilur:   r;   )r=   xs     r'   forwardzLfm2MLP.forwardO   s/    wwqvvdggaj)DGGAJ677r&   )r"   r#   r$   r   r2   rD   __classcell__r>   s   @r'   r,   r,   ?   s    Oz O8r&   r,   c                      e Zd ZdZdZdZdZdZej                  dfde
dedej                  dej                  ez  dz  fdZ	 dd	ej                   d
ej                   dedeeef   dz  deej                   ej                   f   f
dZdej*                  fdZddedz  defdZdej                   dedeeef   fdZdefdZdefdZd Zy)Lfm2HybridConvCachea  
    Attention and conv cache for Lfm2.

    It stores the Key and Value states as a list of tensors, one for each layer.
    Attention layer cache shape: `[batch_size, num_heads, seq_len, head_dim]`.
    Conv layer cache shape: `[batch_size, hidden_size, L_cache-1]`.
    NFr-   max_batch_sizedtypedevicec                    g | _         g | _        || _        |j                  | _        | j                  j	                  d      | _        |j                  | _        || _        g | _        |t        j                  |      nd }t        |j                        D ]  }t        j                  | j                  |j                  | j                  | j                  |      }| j                  j                  |       | j                   j                  t        j                   g              | j                  j                  t        j                   g               y )Nfull_attention)rJ   rK   )	key_cachevalue_cacherI   layer_typesindexfirst_attention_layerconv_L_cache_dtype
conv_cachetorchrK   rangenum_hidden_layerszerosr9   appendtensor)r=   r-   rI   rJ   rK   _
conv_states          r'   r2   zLfm2HybridConvCache.__init__b   s    ,!--%)%5%5%;%;<L%M""//.0)/);f%v//0 
	6A##""!!kkJ OO"":.NN!!%,,r"23##ELL$45
	6r&   
key_statesvalue_states	layer_idxcache_kwargsreturnc                    | j                   |   j                         dk(  r|| j                   |<   || j                  |<   nft        j                  | j                   |   |gd      | j                   |<   t        j                  | j                  |   |gd      | j                  |<   | j                   |   | j                  |   fS )a  
        Updates the cache with the new `key_states` and `value_states` for the layer `layer_idx`.

        Parameters:
            key_states (`torch.Tensor`):
                The new key states to cache.
            value_states (`torch.Tensor`):
                The new value states to cache.
            layer_idx (`int`):
                The index of the layer to cache the states for.
            cache_kwargs (`Dict[str, Any]`, `optional`):
                Additional arguments for the cache subclass. No additional arguments are used in `DynamicCache`.

        Return:
            A tuple containing the updated key and value states.
        r   dim)rN   numelrO   rV   cat)r=   r^   r_   r`   ra   s        r'   updatezLfm2HybridConvCache.update   s    0 >>)$**,1(2DNN9%*6DY'(-		4>>)3Lj2Y_a(bDNN9%*/))T5E5Ei5PR^4_eg*hDY'~~i($*:*:9*EEEr&   beam_idxc                    t        t        | j                              D ]:  }| j                  |   j                         r| j                  |   j                  }| j                  |   j                  d|j                  |            | j                  |<   | j                  |   j                  }| j                  |   j                  d|j                  |            | j                  |<   | j                  |   j                         s| j                  |   j                  }| j                  |   j                  d|j                  |            | j                  |<   = y)zDReorders the cache for beam search, given the selected beam indices.r   N)	rW   lenrN   rg   rK   index_selecttorO   rU   )r=   rj   r`   rK   s       r'   reorder_cachez!Lfm2HybridConvCache.reorder_cache   s#   s4>>23 		mI~~i(..0	299,0NN9,E,R,RSTV^VaVabhVi,jy))))4;;.2.>.>y.I.V.VWXZbZeZeflZm.n  +y)//13::-1__Y-G-T-TUVX`XcXcdjXk-l	*		mr&   c                     | j                   |   dk7  r| j                  n|}t        | j                        |k  s | j                  |   j	                         dk(  ry| j                  |   j
                  d   S )zYReturns the sequence length of the cached states. A layer index can be optionally passed.rM   r   rd   )rP   rR   rl   rN   rg   shaper=   r`   s     r'   get_seq_lengthz"Lfm2HybridConvCache.get_seq_length   sm     372B2B92MQa2aD..gp	t~~)+t~~i/H/N/N/PTU/U~~i(..r22r&   cache_positionc                 V    d}|j                   d   }| j                         }||z   }||fS )aB  
        Return a tuple (kv_length, kv_offset) corresponding to the length and offset that will be returned for
        the given layer at `layer_idx`.
        The masks are then prepared according to the given lengths (kv_length, kv_offset) and patterns (i.e. sliding_window, chunk_size),
        for each layer.
        r   )rq   rs   )r=   rt   r`   full_mask_kv_offsetquery_lengthpast_seen_tokens	kv_lengths          r'   get_mask_sizesz"Lfm2HybridConvCache.get_mask_sizes   s@      %++A...0 #33	---r&   
max_lengthc                    |dk  r| j                         t        |      z
  }| j                         |k  ryt        t        | j                              D ]l  }| j                  |   j                         s!| j                  |   dd|ddf   | j                  |<   | j                  |   dd|ddf   | j                  |<   n y)z"Crop the cache to the given lengthr   N.)rs   absrW   rl   rN   rg   rO   )r=   r{   idxs      r'   cropzLfm2HybridConvCache.crop   s    >,,.Z@J J.T^^,- 	SC~~c"((*&*nnS&9#{
{A:M&Ns#(,(8(8(=c;J;PQ>Q(R  %	Sr&   c                 ,    t        | j                        S r@   )rl   rN   )r=   s    r'   __len__zLfm2HybridConvCache.__len__   s    4>>""r&   c                     t        t        | j                              D ]  }| j                  |   j                          ! y r@   )rW   rl   rU   zero_rr   s     r'   resetzLfm2HybridConvCache.reset   s4    s4??34 	/IOOI&,,.	/r&   r@   )r   )r"   r#   r$   __doc__rI   is_compileablerN   rO   rV   float32r   r5   rJ   rK   strr2   Tensordictr   tupleri   
LongTensorro   rs   rz   r   r   r   r%   r&   r'   rH   rH   S   sB    NNIK #]],066 6 {{	6
 s"T)6F /3FLLF llF 	F
 38nt+F 
u||U\\)	*FBme&6&6 m3d
 33 3.U\\ .c .eTWY\T\o .Ss S# #/r&   rH   c                       e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
eej                  ej                  dz  f   fdZ xZS )Lfm2Attentionr-   r`   c                    t         |   ||       t        j                  |j                  |j
                  | j                  z  d      | _        t        j                  |j                  |j                  | j                  z  d      | _	        t        j                  |j                  |j                  | j                  z  d      | _
        t        j                  |j
                  | j                  z  |j                  d      | _        t        | j                  |j                        | _        t        | j                  |j                        | _        | `| `y )NFr/   eps)r1   r2   r   r8   r9   num_attention_headshead_dimq_projnum_key_value_headsk_projv_projout_projr    norm_epsq_layernormk_layernormo_projattention_dropoutr=   r-   r`   r>   s      r'   r2   zLfm2Attention.__init__   s    +ii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejk		&"<"<t}}"LfN`N`glm&t}}&//J&t}}&//JK"r&   Nhidden_statesposition_embeddingsattention_maskpast_key_valuesrt   rb   c                    |j                   d d }g |d| j                  }| j                   | j                  |      j                  |       j                  dd      }	| j                   | j                  |      j                  |       j                  dd      }
 | j                  |      j                  | j                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t               } || |	|
||fd| j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr   r   )sincosrt   g        )dropoutscaling)rq   r   r   r   view	transposer   r   r   r   ri   r`   r
   get_interfacer-   _attn_implementationr   r   reshape
contiguousr   )r=   r   r   r   r   rt   kwargsinput_shapehidden_shapequery_statesr^   r_   r   r   ra   attention_interfaceattn_outputattn_weightsoutputs                      r'   rD   zLfm2Attention.forward   s    $))#2.88b8$--8''(GM(B(G(G(VWaabcefg%%&Edkk-&@&E&E|&TU__`acde
6t{{=166EOOPQSTU&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8	%
 LL	%
 	%
!\ *k));;;;FFH{+|##r&   r   )r"   r#   r$   r   r5   r2   rV   r   r   rH   r   rD   rE   rF   s   @r'   r   r      s    	#z 	#c 	#  7;26'$||'$ #5<<#=>'$ t+	'$
 -t3'$ ((4/'$ 
u||U\\D00	1'$r&   r   c            
       r    e Zd Zdedef fdZ	 	 	 ddej                  dedz  dej                  dz  dej                  dz  fd	Z
	 	 	 ddej                  dedz  dej                  dz  dej                  dz  fd
Z	 	 	 ddej                  dedz  dej                  dz  dej                  dz  fdZ xZS )Lfm2ShortConvr-   r`   c           	      2   t         |           || _        || _        |j                  | _        |j                  | _        t        j                  |j                  |j                  | j
                  |j                  | j                  | j
                  dz
        | _        t        j                  |j                  d|j                  z  | j                        | _        t        j                  |j                  |j                  | j                        | _        y )Nr   )in_channelsout_channelskernel_sizegroupsr0   paddingr   r/   )r1   r2   r-   r`   rS   L_cache	conv_biasr0   r   Conv1dr9   convr8   in_projr   r   s      r'   r2   zLfm2ShortConv.__init__  s    
 	"**$$	II**++%%LL1$
	 yy!3!3Q9K9K5KRVR[R[\		&"4"4f6H6HtyyYr&   NrC   r   rt   r   c                    t        ||      }| j                  |      j                  dd      }|j                  dd      \  }}}||z  }| j                  j
                  j                  | j                  j
                  j                  d      | j                  j
                  j                  d            }	|c|d   dkD  r[t        |j                  d      |j                  | j                     |	| j                  j                  d       }
|
j                  d      }
n|dt        j                  j!                  || j"                  |j$                  d   z
  df      }|j                  | j                     j'                  |       t)        ||	| j                  j                  d       }
||
z  }| j+                  |j                  dd      j-                               }|S )Nr   rd   r   re   r   r   )
activation)r   r   r   chunkr   weightr   sizer   squeezerU   r`   r0   	unsqueezer   
functionalpadr   rq   copy_r   r   r   )r=   rC   r   rt   r   BCxBCBxconv_weightsconv_outr]   ys                r'   cuda_kernels_forwardz"Lfm2ShortConv.cuda_kernels_forward&  s    )N;ll1o''B/))A2)&1aUyy'',,TYY-=-=-B-B1-EtyyGWGWG\G\]^G_`&>!+<q+@+

2**4>>:		H  ))"-H*]]..rDLL288B<4OQR3ST
**4>>:@@L'L$))..UYZHLMM!++b"-88:;r&   c                    |j                   d   }t        ||      }| j                  |      j                  dd      }|j	                  dd      \  }}}||z  }	|5|d   dkD  r,|j
                  | j                     }
|j                  d| j                  dz
        }|
j                  dd      }
|	j                  |
j                  |
j                        |
d d d d |f<   |j
                  | j                     j                  |
       t        j                  |
j                  |	j                        | j                   j"                  d d dd d f   z  d      }| j$                  r|| j                   j$                  z  }|j'                  d      }n~|dt(        j*                  j-                  |	| j                  |	j                   d   z
  df      }
|j
                  | j                     j                  |
       | j!                  |	      d	d |f   }||z  }|j                  dd      j/                         }| j1                  |      }|S )
Nr   r   rd   r   re   r   )shiftsdims)rK   rJ   .)rq   r   r   r   r   rU   r`   clampr   rollrn   rK   rJ   r   rV   sumr   r   r0   r   r   r   r   r   r   )r=   rC   r   rt   r   seqlenr   r   r   r   r]   r   r   s                r'   slow_forwardzLfm2ShortConv.slow_forwardH  s    (N;ll1o''B/))A2)&1aU&>!+<q+@(33DNNCJ+11!T\\A5EFN#<J/1uuJ<M<MU_UeUeu/fJq!^+,&&t~~6<<ZHyyryy!9DII<L<LQPQSTW<U!U[]^HyyDIINN*))"-H*]]..rDLL288B<4OQR3ST
**4>>:@@Lyy}S'6'\2HLKKB**,MM!r&   r   c                     t         r6d|j                  j                  v rt               s| j	                  ||||      S | j                  ||||      S )Ncuda)is_fast_path_availablerK   typer   r   r   )r=   r   r   rt   r   s        r'   rD   zLfm2ShortConv.forwardn  sP     "f0D0D0I0I&IRjRl,,]O^]kll  Q_``r&   )NNN)r"   r#   r$   r   r5   r2   rV   r   rH   r   r   r   rD   rE   rF   s   @r'   r   r     s   ZZ Z2 7;26.2 <<  -t3  ((4/	 
 t+ J 7;26.2$<<$ -t3$ ((4/	$
 t+$R 7;26.2	a||	a -t3	a ((4/		a
 t+	ar&   r   c                       e Zd Zdedef fdZ	 	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	e
dz  d
ej                  dz  dej                  fdZ xZS )Lfm2DecoderLayerr-   r`   c                 f   t         |           |j                  |   dk(  | _        | j                  rt	        ||      | _        nt        ||      | _        t        |      | _	        t        |j                  |j                        | _        t        |j                  |j                        | _        y )NrM   r   )r1   r2   rP   is_attention_layerr   	self_attnr   r   r,   feed_forwardr    r9   r   operator_normffn_normr   s      r'   r2   zLfm2DecoderLayer.__init__{  s    "("4"4Y"?CS"S""*69=DN%fi8DI#FO(););Q#F$6$6FOOLr&   Nr   r   r   position_idsr   rt   rb   c           
         |}| j                   r, | j                  d| j                  |      |||||d|\  }}	n$| j                  | j                  |      |||      }||z   }|| j	                  | j                  |            z   }|S )N)r   r   r   r   r   rt   )r   r   rt   r   r%   )r   r   r   r   r   r   )
r=   r   r   r   r   r   rt   r   residualr\   s
             r'   rD   zLfm2DecoderLayer.forward  s     !""-t~~  "00?$7-) /-   M1 !II"00? /--	 & M &0%(9(9$--:V(WWr&   )NNNNN)r"   r#   r$   r   r5   r2   rV   r   r   r   rH   rD   rE   rF   s   @r'   r   r   z  s    
Mz 
Mc 
M IM.2046:26|| #5<<#=>E t+	
 &&- -t3 ((4/ 
r&   r   c                       e Zd ZdZy)Lfm2PreTrainedModelFN)r"   r#   r$   _can_compile_fullgraphr%   r&   r'   r   r     s    "r&   r   c                        e Zd Zdef fdZ	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dedz  dej                  dz  d	e
dz  d
ej                  dz  dee   defdZ xZS )	Lfm2Modelr-   c                 t    t         |   |       t        |j                  |j                        | _        | `y )Nr   )r1   r2   r    r9   r   embedding_normnorm)r=   r-   r>   s     r'   r2   zLfm2Model.__init__  s.     )&*<*<&//RIr&   N	input_idsr   r   r   inputs_embeds	use_cachert   r   rb   c           
         |d u |d uz  rt        d      || j                  |      }|r>|<|j                  d   }	t        | j                  |	| j
                  | j                        }|F||j                         nd}
t        j                  |
|
|j                  d   z   |j                        }||j                  d      }t        | j                  |||||      }|j                  d   dk7  r|nd }|}| j                  ||      }| j                  d | j                  j                   D ]!  }|j                  r|n|} ||f|||||d|}# | j!                  |      }t#        ||	      S )
Nz:You must specify exactly one of input_ids or inputs_embedsr   )r-   rI   rJ   rK   r   )rK   )r-   r   r   rt   r   r   )r   )r   r   r   r   rt   )last_hidden_stater   )
ValueErrorembed_tokensrq   rH   r-   rJ   rK   rs   rV   aranger   r   
rotary_emblayersrX   r   r   r	   )r=   r   r   r   r   r   r   rt   r   
batch_sizerx   causal_masklinear_attentionr   r   decoder_layer
layer_masks                    r'   rD   zLfm2Model.forward  s    -t";<YZZ  --i8M0&,,Q/J1{{:TZZX\XcXcO !CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L(;;'))+%
 .;-@-@-Cq-H>d%"oom,oW "[[)H4;;+H+HI 
	M(5(H(HN^J))$7) /- M
	 ++M:&++
 	
r&   )NNNNNNN)r"   r#   r$   r   r2   rV   r   r   rH   FloatTensorboolr   r   r	   rD   rE   rF   s   @r'   r   r     s    z  .2.2046:26!%26@
##d*@
 t+@
 &&-	@

 -t3@
 ((4/@
 $;@
 ((4/@
 +,@
 
!@
r&   r   c                       e Zd Zy)Lfm2ForCausalLMNr!   r%   r&   r'   r  r    r(   r&   r  )r  r   r   )<collections.abcr   typingr   rV   torch.nn.functionalr   r   rA   masking_utilsr   modeling_layersr   modeling_outputsr	   modeling_utilsr
   processing_utilsr   utilsr   r   utils.import_utilsr   r   bamba.modeling_bambar   gemma2.modeling_gemma2r   llama.modeling_llamar   r   r   r   r   r   r   configuration_lfm2r   causal_conv1dr   r   kernel_modulesallr   
get_loggerr"   loggerr    r*   Moduler,   rH   r   r   r   r   r   r  __all__r%   r&   r'   <module>r     s%   %      / 9 7 5 & 0 V ? :   + DD-7** #$89^,  
		H	%	, 		/ 	8bii 8(C/ C/L3$N 3$lhaBII haV,1 ,^#. #F

 F
R	& 	 Br&   