
    qiu	                     $   d dl Z ddlmZ de j                  dede j                  fdZ	 	 dde j                  j                  d	e j                  d
e j                  de j                  de j                  dz  dededz  de	e j                  df   fdZ
y)    N   )PagedAttentionCachehidden_statesn_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dims         V/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/sdpa_paged.py	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TT    modulequerykeyvalueattention_maskdropoutscalingc           	      |   |j                  dd       }|k|j                  ||| j                  |d   |d         \  }}|j                  dd      j	                  d      }|j                  dd      j	                  d      }t        | d      r,t        || j                        }t        || j                        }|}	|j                         }|j                         }|j                         }t        j                  j                  j                  ||||	||d	      }
|
j                  dd
      j                         }
|
d fS )Ncache
read_indexwrite_index)
key_statesvalue_states	layer_idxr   r   r   r	   num_key_value_groupsF)	attn_mask	dropout_pscale	is_causalr   )popupdater!   	transpose	unsqueezehasattrr   r"   
contiguoustorchnn
functionalscaled_dot_product_attention)r   r   r   r   r   r   r   kwargsr   causal_maskattn_outputs              r   sdpa_attention_paged_forwardr4      sG    )/

7D(AE\\&&l+}- " 

U mmAq!++A.1%//2 v-.V889%!<!<= !K E
..
CE((%%BB C 	K ''1-88:Kr   )g        N)r-   $generation.continuous_batching.cacher   Tensorintr   r.   Modulefloattupler4    r   r   <module>r<      s     F	UU\\ 	U# 	U%,, 	U$  0HHOO0<<0 
0 <<	0
 LL4'0 0 T\0 5<<0r   