
    qi3%                        d Z ddlmZ ddlZddlmZ ddlmZ ddlmZm	Z	 ddl
mZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddlmZmZmZm Z m!Z!m"Z"m#Z#m$Z$ ddl%m&Z&  ejN                  e(      Z) G d dejT                        Z+ G d de      Z, G d de      Z- G d de"      Z. G d de      Z/ G d de       Z0 G d  d!e!      Z1g d"Z2y)#zPyTorch Starcoder2 model.    )CallableN)nn   )ACT2FN)CacheDynamicCache)create_causal_mask!create_sliding_window_causal_mask)FlashAttentionKwargs)BaseModelOutputWithPast)ALL_ATTENTION_FUNCTIONS)Unpack)TransformersKwargslogging)merge_with_config_defaults)capture_outputs   )MistralAttentionMistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralModelapply_rotary_pos_embeager_attention_forward   )Starcoder2Configc                   h     e Zd Zdef fdZdeej                     dz  dej                  fdZ xZ	S )Starcoder2MLPconfigc                 P   t         |           |j                  }t        j                  ||j
                  |j                        | _        t        j                  |j
                  ||j                        | _        t        |j                     | _        |j                  | _        y )Nbias)super__init__hidden_sizer   Linearintermediate_sizeuse_biasc_fcc_projr   
hidden_actactresidual_dropout)selfr    	embed_dim	__class__s      c/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/starcoder2/modular_starcoder2.pyr%   zStarcoder2MLP.__init__5   su    &&	IIi)A)AX	ii 8 8)&//Z&++, & 7 7    hidden_statesNreturnc                     | j                  |      }| j                  |      }| j                  |      }t        j                  j                  || j                  | j                        }|S )Nptraining)r*   r-   r+   r   
functionaldropoutr.   r9   )r/   r4   s     r2   forwardzStarcoder2MLP.forward=   sZ    		-0/M2--mt?T?T_c_l_l-mr3   )
__name__
__module____qualname__r   r%   tupletorchFloatTensorr<   __classcell__r1   s   @r2   r   r   4   s9    8/ 8U5+<+<%=%D IZIZ r3   r   c                   :    e Zd Zddededz  f fdZ	 	 ddej                  deej                  ej                  f   dej                  dz  de	dz  d	ej                  dz  d
ee   deej                  ej                  dz  eej                     dz  f   fdZ xZS )Starcoder2AttentionNr    	layer_idxc                    t         |   ||       |j                  | _        t        j                  |j
                  |j                  | j                  z  |j                        | _	        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr    rG   r"   )r$   r%   r.   r   r'   r&   num_attention_headshead_dimr)   q_projnum_key_value_headsk_projv_projo_projr/   r    rG   r1   s      r2   r%   zStarcoder2Attention.__init__F   s    )< & 7 7ii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii 2 2F4N4NQUQ^Q^4^eketetuii : :T]] JFL^L^eketetur3   r4   position_embeddingsattention_maskpast_key_valuescache_positionkwargsr5   c           
         |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  t%        | j                  dd       d|\  }} |j&                  g |d j)                         }| j+                  |      }t,        j.                  j1                  || j2                  | j                        }||fS )	Nr   r   )sincosrU   g        sliding_window)r;   scalingr[   r7   )shaperK   rL   view	transposerN   rO   r   updaterG   r   get_interfacer    _attn_implementationr   r9   attention_dropoutr\   getattrreshape
contiguousrP   r   r:   r;   r.   )r/   r4   rR   rS   rT   rU   rV   input_shapehidden_shapequery_states
key_statesvalue_statesrZ   rY   cache_kwargsattention_interfaceattn_outputattn_weightss                     r2   r<   zStarcoder2Attention.forwardN   s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8
%
  $}}C$2H2HLL"4;;0@$G
%
 
%
!\ *k));;;;FFHkk+.mm++4004== , 
 L((r3   )N)NN)r=   r>   r?   r   intr%   rA   Tensorr@   r   
LongTensorr   r   r<   rC   rD   s   @r2   rF   rF   E   s    v/ vC$J v )-26.)||.) #5<<#=>.) t+	.)
 .) ((4/.) -..) 
u||U\\D0%2E2LL	M.)r3   rF   c                   (     e Zd Zdedef fdZ xZS )Starcoder2DecoderLayerr    rG   c                 *   t         |   ||       t        ||      | _        t	        |      | _        t        j                  |j                  |j                        | _
        t        j                  |j                  |j                        | _        y )NrI   eps)r$   r%   rF   	self_attnr   mlpr   	LayerNormr&   norm_epsiloninput_layernormpost_attention_layernormrQ   s      r2   r%   zStarcoder2DecoderLayer.__init__   sj    +,FiP (!||F,>,>FDWDWX(*V5G5GVM`M`(a%r3   )r=   r>   r?   r   rp   r%   rC   rD   s   @r2   rt   rt      s     b/ bC b br3   rt   c                       e Zd Zdef fdZee	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  de
dz  dej                  dz  d	edz  d
ej                  dz  dee   deez  fd              Z xZS )Starcoder2Modelr    c           	      :   t         |   |       t        j                  t	        |j
                        D cg c]  }t        ||       c}      | _        t        j                  |j                  |j                        | _        |j                  | _        y c c}w )Nrv   )r$   r%   r   
ModuleListrangenum_hidden_layersrt   layersrz   r&   r{   normembedding_dropoutrQ   s      r2   r%   zStarcoder2Model.__init__   su     mmHMfNfNfHgh9#FI6h
 LL!3!39L9LM	!'!9!9 is   BN	input_idsrS   position_idsrT   inputs_embeds	use_cacherU   rV   r5   c                    |d u |d uz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}	t        j                  |	|	|j                  d   z   |j                        }||j                  d      }| j                  j                  t        nt        }
 |
| j                  |||||      }|}t        j                  j                  || j                   | j"                        }| j%                  ||      }| j&                  d | j                  j(                   D ]  } ||f||||||d	|} | j+                  |      }t-        ||r|
      S d 
      S )Nz:You must specify exactly one of input_ids or inputs_embeds)r    r   r   )device)r    r   rS   rU   rT   r   r7   )r   )rS   r   rT   r   rU   rR   )last_hidden_staterT   )
ValueErrorembed_tokensr   r    get_seq_lengthrA   aranger]   r   	unsqueezer[   r	   r
   r   r:   r;   r   r9   
rotary_embr   r   r   r   )r/   r   rS   r   rT   r   r   rU   rV   past_seen_tokensmask_functioncausal_maskr4   rR   decoder_layers                  r2   r<   zStarcoder2Model.forward   s    -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L.2kk.H.H.P*Vw#;;'))+%
 &--T33dmm . 
 #oom,oW![[)H4;;+H+HI 
	M)	*) /#-$7	 	M
	 		-0&+/8O
 	
>B
 	
r3   )NNNNNNN)r=   r>   r?   r   r%   r   r   rA   rr   rq   r   rB   boolr   r   r@   r   r<   rC   rD   s   @r2   r   r      s    :/ :   .2.204(,26!%26>
##d*>
 t+>
 &&-	>

 >
 ((4/>
 $;>
 ((4/>
 +,>
 
(	(>
   >
r3   r   c                       e Zd Zy)Starcoder2ForCausalLMNr=   r>   r?    r3   r2   r   r          r3   r   c                       e Zd Zy)#Starcoder2ForSequenceClassificationNr   r   r3   r2   r   r      r   r3   r   c                       e Zd Zy) Starcoder2ForTokenClassificationNr   r   r3   r2   r   r      r   r3   r   )r   r   Starcoder2PreTrainedModelr   r   )3__doc__collections.abcr   rA   r   activationsr   cache_utilsr   r   masking_utilsr	   r
   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   utils.genericr   utils.output_capturingr   mistral.modeling_mistralr   r   r   r   r   r   r   r   configuration_starcoder2r   
get_loggerr=   loggerModuler   rF   rt   r   r   r   r   __all__r   r3   r2   <module>r      s   &   $   ! . R B 7 5 & 0 7 5	 	 	 7 
		H	%BII "7)* 7)tb0 bI
l I
X	. 		*J 		'D 	r3   