
    qil                        d dl mZ d dlZd dlmZ ddlmZmZ ddlmZm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z(  ejR                  e*      Z+ G d de"      Z, G d de      Z- G d de      Z. G d de$      Z/ G d de#      Z0 G d de      Z1 G d d e!      Z2 G d! d"e       Z3 G d# d$ee/      Z4g d%Z5y)&    )CallableN)nn   )CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)GenericForQuestionAnswering)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargsauto_docstringlogging)merge_with_config_defaults)capture_outputs   )
LlamaAttentionLlamaDecoderLayerLlamaForCausalLMLlamaForSequenceClassificationLlamaForTokenClassificationLlamaMLP
LlamaModelLlamaPreTrainedModelapply_rotary_pos_embeager_attention_forward   )MistralConfigc                        e Zd Z fdZ xZS )
MistralMLPc                 J   t         |   |       t        j                  | j                  | j
                  d      | _        t        j                  | j                  | j
                  d      | _        t        j                  | j
                  | j                  d      | _        y )NFbias)	super__init__r   Linearhidden_sizeintermediate_size	gate_projup_proj	down_proj)selfconfig	__class__s     ]/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/mistral/modular_mistral.pyr'   zMistralMLP.__init__%   ss     4#3#3T5K5KRWXyy!1!143I3IPUV4#9#94;K;KRWX    )__name__
__module____qualname__r'   __classcell__r0   s   @r1   r"   r"   $   s    Y Yr2   r"   c                       e Zd Zdedef fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
ee   deej                  ej                  dz  f   fdZ xZS )MistralAttentionr/   	layer_idxc                 p   t         |   ||       t        |dd       xs |j                  |j                  z  | _        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  |j                  | j
                  z  d      | _
        t        j                  |j                  |j                  | j
                  z  d      | _        t        j                  |j                  | j
                  z  |j                  d      | _        y )Nhead_dimFr$   )r&   r'   getattrr)   num_attention_headsr<   r   r(   q_projnum_key_value_headsk_projv_projo_projr.   r/   r:   r0   s      r1   r'   zMistralAttention.__init__-   s    +
D9mV=O=OSYSmSm=mii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii 2 2F4N4NQUQ^Q^4^ejkii : :T]] JFL^L^ejkr2   Nhidden_statesposition_embeddingsattention_maskpast_key_valuescache_positionkwargsreturnc           
      D   |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  t%        | j                  dd       d|\  }} |j&                  g |d j)                         }| j+                  |      }||fS )Nr   r   )sincosrI   g        sliding_window)dropoutscalingrP   )shaper<   r?   view	transposerA   rB   r   updater:   r   get_interfacer/   _attn_implementationr   trainingattention_dropoutrR   r=   reshape
contiguousrC   )r.   rE   rF   rG   rH   rI   rJ   input_shapehidden_shapequery_states
key_statesvalue_statesrO   rN   cache_kwargsattention_interfaceattn_outputattn_weightss                     r1   forwardzMistralAttention.forward5   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.L((r2   )NN)r3   r4   r5   r    intr'   torchTensortupler   
LongTensorr   r
   rf   r6   r7   s   @r1   r9   r9   ,   s    l} l l )-26*)||*) #5<<#=>*) t+	*)
 *) ((4/*) -.*) 
u||U\\D00	1*)r2   r9   c                   (     e Zd Zdedef fdZ xZS )MistralDecoderLayerr/   r:   c                 j    t         |   ||       t        ||      | _        t	        |      | _        y )N)r/   r:   )r&   r'   r9   	self_attnr"   mlprD   s      r1   r'   zMistralDecoderLayer.__init__c   s,    +)9Mf%r2   )r3   r4   r5   r    rg   r'   r6   r7   s   @r1   rm   rm   b   s    &} & & &r2   rm   c                       e Zd ZeedZy)MistralPreTrainedModel)rE   
attentionsN)r3   r4   r5   rm   r9   _can_record_outputs r2   r1   rr   rr   i   s    ,&r2   rr   c                       e Zd Zeee	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  de	dz  dej                  dz  dedz  dej                  dz  d	ee   d
efd                     Zy)MistralModelN	input_idsrG   position_idsrH   inputs_embeds	use_cacherI   rJ   rK   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}| j                  ||      }| j                  d | j                  j                   D ]  } ||f||||||d|} | j!                  |      }t#        ||r|	      S d 	      S )
Nz:You must specify exactly one of input_ids or inputs_embeds)r/   r   r   )device)r/   rz   rG   rI   rH   ry   )ry   )rG   ry   rH   r{   rI   rF   )last_hidden_staterH   )
ValueErrorembed_tokensr   r/   get_seq_lengthrh   arangerS   r}   	unsqueezerP   r   r	   
rotary_emblayersnum_hidden_layersnormr   )r.   rx   rG   ry   rH   rz   r{   rI   rJ   past_seen_tokensmask_functioncausal_maskrE   rF   decoder_layers                  r1   rf   zMistralModel.forwardq   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;'))+%
 &"oom,oW![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r2   )NNNNNNN)r3   r4   r5   r   r   r   rh   rk   ri   r   FloatTensorboolr   r   r   rf   ru   r2   r1   rw   rw   p   s     .2.204(,26!%269
##d*9
 t+9
 &&-	9

 9
 ((4/9
 $;9
 ((4/9
 +,9
 
!9
    9
r2   rw   c                       e Zd Zy)MistralForCausalLMNr3   r4   r5   ru   r2   r1   r   r          r2   r   c                       e Zd Zy)MistralForTokenClassificationNr   ru   r2   r1   r   r      r   r2   r   c                       e Zd Zy) MistralForSequenceClassificationNr   ru   r2   r1   r   r      r   r2   r   c                       e Zd Zy)MistralForQuestionAnsweringNr   ru   r2   r1   r   r      s    r2   r   )r   r   rw   rr   r   r   )6collections.abcr   rh   r   cache_utilsr   r   masking_utilsr   r	   modeling_flash_attention_utilsr
   modeling_layersr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   llama.modeling_llamar   r   r   r   r   r   r   r   r   r   configuration_mistralr    
get_loggerr3   loggerr"   r9   rm   rr   rw   r   r   r   r   __all__ru   r2   r1   <module>r      s    $   . R B 8 5 & @ @ 7 5   1 
		H	%Y Y3)~ 3)l&+ &1 =
: =
@	) 		$? 		'E 	 \"=?U [r2   