
    qi                        d dl Z ddlmZmZmZ ddlmZ  ej                  e      Z	 edd      Z
 edd      Z e       Z e       Zd	e j                  d
ede j                  fdZde j                  dz  de j                  defdZ	 	 	 dde j&                  j(                  de j                  de j                  de j                  de j                  dz  dededz  dedz  dee j                  df   fdZy)    N   )is_torch_npu_availableis_torch_xpu_availablelogging)is_torch_greater_or_equalz2.5T)
accept_devz2.8hidden_statesn_repreturnc                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r	   r
   batchnum_key_value_headsslenhead_dims         Z/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/sdpa_attention.py	repeat_kvr      so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TT    attention_maskkeyc                 2    t         rt        S t        xr | d u S )N)_is_torch_xpu_available#_is_torch_greater_or_equal_than_2_8#_is_torch_greater_or_equal_than_2_5)r   r   s     r   use_gqa_in_sdpar      s     22.I>T3IIr   modulequeryvaluedropoutscaling	is_causalc                 <   |j                  dd      rt        j                  d       i }	t        | d      r=t	        ||      s-t        || j                        }t        || j                        }nddi}	||nt        | dd      }|j                  d   d	kD  xr |d u xr |}t        j                  j                         r*t        |t        j                        r|j                         }t        r[|Y|j                   t        j"                  k7  r<t        j$                  |j#                               j'                  |j(                        }t        j*                  j,                  j.                  |||f||||d
|	}
|
j1                  d	d      j3                         }
|
d fS )Noutput_attentionsFz`sdpa` attention does not support `output_attentions=True`. Please set your attention to `eager` if you want any of these features.num_key_value_groups
enable_gqaTr$   r   r   )	attn_mask	dropout_pscaler$   )getloggerwarning_oncehasattrr   r   r'   getattrr   torchjit
is_tracing
isinstanceTensoritem_is_torch_npu_availabledtypeboollogical_nottodevicenn
functionalscaled_dot_product_attention	transpose
contiguous)r   r    r   r!   r   r"   r#   r$   kwargssdpa_kwargsattn_outputs              r   sdpa_attention_forwardrE   (   s    zz%u-W	
 Kv-.~s3C!<!<=CeV%@%@AE'.K '2	UY8ZI A"K~'=K)I yy*Y"ENN$	
 %.*>*>%***L"..~/B/B/DEHHVN((%%BB	 !	 	K ''1-88:Kr   )g        NN)r1   utilsr   r   r   utils.import_utilsr   
get_logger__name__r-   r   r   r   r7   r5   intr   r9   r   r=   ModulefloattuplerE    r   r   <module>rO      sJ    K K : 
		H	% '@RV&W #&?RV&W #02 02 	UU\\ 	U# 	U%,, 	U	JELL4$7 	Jell 	Jt 	J$  !@HHOO@<<@ 
@ <<	@
 LL4'@ @ T\@ d{@ 5<<@r   