
    qiF                        d dl mZ d dlZd dlmZ ddlmZ ddlmZm	Z	 ddl
mZ ddlmZmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZmZ ddlm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ ddl0m1Z1m2Z2  ejf                  e4      Z5 G d de!      Z6d Z7d Z8 G d dejr                        Z: G d de'      Z; G d de(      Z< G d de       Z= G d d e#      Z> G d! d"e%      Z? G d# d$e$      Z@ G d% d&e"      ZA G d' d(e&      ZBe G d) d*e             ZC G d+ d,eC      ZD G d- d.ejr                        ZE G d/ d0ejr                        ZF G d1 d2ejr                        ZGee G d3 d4e                    ZH G d5 d6ejr                        ZI G d7 d8ejr                        ZJ G d9 d:e.      ZK G d; d<e/      ZL G d= d>e,      ZM G d? d@e*      ZN G dA dBe+      ZO G dC dDe-      ZP G dE dFeP      ZQ G dG dHePe      ZRg dIZSy)J    )	dataclassN)nn   )initialization)CacheDynamicCache)GenerationMixin)create_bidirectional_maskcreate_causal_mask)BaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)PreTrainedModel)auto_docstringcan_return_tuplelogging)merge_with_config_defaults)OutputRecordercapture_outputs   )	EsmAttentionEsmEmbeddings
EsmEncoderEsmIntermediateEsmLayer	EsmOutput	EsmPoolerEsmSelfAttentionEsmSelfOutput)LlamaAttentionLlamaDecoderLayerLlamaMLPLlamaPreTrainedModelLlamaRMSNormLlamaRotaryEmbedding   )EvollaConfigSaProtConfigc                        e Zd Z fdZ xZS )EvollaSaProtEmbeddingsc                 2    t         |   |       d | _        y N)super__init__position_idsselfconfig	__class__s     [/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/evolla/modular_evolla.pyr/   zEvollaSaProtEmbeddings.__init__@   s          )__name__
__module____qualname__r/   __classcell__r4   s   @r5   r+   r+   ?   s    ! !r6   r+   c                 b    | j                  dd      \  }}t        j                  | |fd      S )Nr   dim)chunktorchcat)xx1x2s      r5   rotate_half_esmrF   F   s/    WWQBWFB99rc2YB''r6   c                     |d d d d d | j                   d   d d f   }|d d d d d | j                   d   d d f   }| |z  t        |       |z  z   S )N)shaperF   )rC   cossins      r5   apply_rotary_pos_emb_esmrL   K   sY    
aMaggbkM1$
%C
aMaggbkM1$
%CG*S011r6   c                        e Zd ZU dZej
                  ed<   def fdZd
dZ	dej
                  dej
                  de
ej
                  ej
                  f   fd	Z xZS )EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    inv_freqr?   c                     t         |           || _        ddt        j                  d|dt        j
                        j                         |z  z  z  }| j                  d|       d | _        d | _	        d | _
        y )N      ?'  r   r   dtyperO   )r.   r/   r?   rA   arangeint64floatregister_buffer_seq_len_cached_cos_cached_sin_cached)r2   r?   rO   r4   s      r5   r/   z$EvollaSaProtRotaryEmbedding.__init__[   sn    %ELLC%++$N$T$T$VY\$\]^Z2#r6   c                 t   |j                   |   }|| j                  k7  s#| j                  j                  |j                  k7  r|| _        t	        j
                  |j                   |   |j                        j                  | j                        }t	        j                  || j                        }t	        j                  ||fd      j                  |j                        }|j                         d d d d d d f   | _        |j                         d d d d d d f   | _        | j                  | j                  fS )Ndevicer=   r>   )rI   rY   rZ   r^   rA   rU   type_asrO   outerrB   torJ   rK   r[   )r2   rC   seq_dimensionseq_lentfreqsembs          r5   _update_cos_sin_tablesz2EvollaSaProtRotaryEmbedding._update_cos_sin_tablesf   s    ''-( d***d.>.>.E.E.Q#*D QWW]3AHHEMMdmm\AKK4==1E))UEN366qxx@C"wwytQ)9:D"wwytQ)9:D!1!111r6   qkreturnc                 .   | j                  |d      \  | _        | _        t        || j                  | j                        j	                  |j
                        t        || j                  | j                        j	                  |j
                        fS )NrH   )rb   rS   )rg   rZ   r[   rL   ra   rT   )r2   rh   ri   s      r5   forwardz#EvollaSaProtRotaryEmbedding.forwardv   s    -1-H-HZ\-H-]*$* %Q(8(8$:J:JKNNUVU\U\N]$Q(8(8$:J:JKNNUVU\U\N]
 	
r6   )r   )r7   r8   r9   __doc__rA   Tensor__annotations__intr/   rg   tuplerl   r:   r;   s   @r5   rN   rN   R   sY     ll	 C 	 2 
 
%,, 
5u||A[;\ 
r6   rN   c                       e Zd ZddZy)EvollaSaProtSelfAttentionNc                    t         j                  j                  |        || _        |j                  |j
                  z  dk7  r2t        |d      s&t        d|j                   d|j
                   d      |j
                  | _        t        |j                  |j
                  z        | _	        | j
                  | j                  z  | _
        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        |j                  | _        d | _        |xs t%        |dd      | _        | j&                  dk(  rt)        | j                  	      | _        |j*                  | _        || _        d
| _        | j*                  xr | | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()position_embedding_typeabsoluterotaryr>   rQ   )r   Moduler/   r3   hidden_sizenum_attention_headshasattr
ValueErrorrp   attention_head_sizeall_head_sizeLinearquerykeyvalueattention_probs_dropout_probdropoutrotary_embeddingsgetattrrw   rN   
is_decoder	layer_idxscaling	is_causal)r2   r3   rw   r   is_cross_attentions        r5   r/   z"EvollaSaProtSelfAttention.__init__   s   
		4  : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
::!%'> (
'-zC
$ ''83%@TE]E]%^D" ++"C1C-Cr6   )NNF)r7   r8   r9   r/    r6   r5   rs   rs      s    Dr6   rs   c                       e Zd Zy)EvollaSaProtSelfOutputNr7   r8   r9   r   r6   r5   r   r          r6   r   c                       e Zd Zy)EvollaSaProtAttentionNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)EvollaSaProtIntermediateNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)EvollaSaProtOutputNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)EvollaSaProtLayerNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)EvollaSaProtEncoderNr   r   r6   r5   r   r      r   r6   r   c                       e Zd Zy)EvollaSaProtPoolerNr   r   r6   r5   r   r      r   r6   r   c                   t     e Zd ZU eed<   dgZdZdZdZdZ	e
 eedd      g eedd      gdZ fd	Z xZS )
EvollaSaProtPreTrainedModelr3   r   Tr'   	attention)index
layer_namecrossattention)hidden_states
attentionscross_attentionsc                 *   t         |   |       t        |t              rsddt	        j
                  d|j                  dt        j                        j                         |j                  z  z  z  }t        j                  |j                  |       y y )NrQ   rR   r   r   rS   )r.   _init_weights
isinstancerN   rA   rU   r?   rV   rW   initcopy_rO   )r2   modulerO   r4   s      r5   r   z)EvollaSaProtPreTrainedModel._init_weights   sm    f%f9:eQ

AU[[(Y(_(_(adjdndn(nopHJJv1 ;r6   )r7   r8   r9   r)   ro   _no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr   r   rs   _can_record_outputsr   r:   r;   s   @r5   r   r      sd    ,-N"& +%&?qU`ab4AJZ[
2 2r6   r   c            
            e Zd Zdef fdZd Zd Zee	 d
de	j                  dz  de	j                  dz  dee	j                     ez  fd	              Z xZS )EvollaSaProtProteinEncoderr3   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r-   )r.   r/   r+   
embeddingsr   encoder	post_initr1   s     r5   r/   z#EvollaSaProtProteinEncoder.__init__   s2     08*62r6   c                 .    | j                   j                  S r-   r   word_embeddingsr2   s    r5   get_input_embeddingsz/EvollaSaProtProteinEncoder.get_input_embeddings   s    ...r6   c                 &    || j                   _        y r-   r   r2   r   s     r5   set_input_embeddingsz/EvollaSaProtProteinEncoder.set_input_embeddings   s    */'r6   N	input_idsattention_maskrj   c                 ^   |j                         }|\  }}|j                  }|t        j                  ||f|      }| j	                  ||      }t        | j                  ||      } | j                  |fd|i|}	|	d   }
t        |
|	j                  |	j                  |	j                        S )Nr]   r   r   )r3   inputs_embedsr   r   r   )last_hidden_stater   r   r   )sizer^   rA   onesr   r
   r3   r   r   r   r   r   )r2   r   r   kwargsinput_shape
batch_size
seq_lengthr^   r   encoder_outputssequence_outputs              r5   rl   z"EvollaSaProtProteinEncoder.forward   s      nn&!,
J!!!"ZZ*j)A6RN)N[2;;')
 '$,,}^^^W]^)!,;-)77&11,==	
 	
r6   r-   )r7   r8   r9   r)   r/   r   r   r   r   rA   rn   rq   r   rl   r:   r;   s   @r5   r   r      sw    | /0   /3
<<$&
 t+

 
u||	K	K
   
r6   r   c                   &     e Zd Zd fd	Zd Z xZS )!EvollaSequenceCompressorAttentionc                 j   t         |           |dz  | _        || _        ||z  }t	        j
                  |      | _        t	        j
                  |      | _        t	        j                  ||d      | _	        t	        j                  ||dz  d      | _
        t	        j                  ||d      | _        y )N      Fbiasr   )r.   r/   scaleheadsr   	LayerNorm
norm_medianorm_latentsr   to_qto_kvto_out)r2   r?   dim_headr   	inner_dimr4   s        r5   r/   z*EvollaSequenceCompressorAttention.__init__  s    t^

u$	,,s+LL-IIc959	YYsIM>
ii	3U;r6   c                 F   | j                  |      }| j                  |      }| j                  }| j                  |      }t	        j
                  ||fd      }| j                  |      j                  dd      \  }}|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|| j                  z  }t	        j                  ||j                  dd            }	|	|	j                  dd	      j                         z
  }	|	j                   \  }
}}}t	        j"                  ||      j%                  |j&                        }|d
d
d
d
d
d
f   }|d
d
d
d
d
d
f   }||z  }|	j)                  d|z
  j+                         d      }	|	j-                  d      }t	        j                  ||      }|j                  dddd      }|j/                  |j                  d      |j                  d      d      }| j1                  |      S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        rH   r>   r   r=   r   r'   r   Tr?   keepdimNg     )r   r   r   r   rA   rB   r   r@   viewr   permuter   matmul	transposeamaxdetachrI   r   ra   r^   masked_fillboolsoftmaxreshaper   )r2   rC   latentsmaskhrh   kv_inputri   vsimbsnhskdokdr   mask_expones_expattnouts                      r5   rl   z)EvollaSequenceCompressorAttention.forward  sB    OOA##G,JJIIg99a\r2zz(#))2 * 
1 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1akk"b12CHHTH299;;99BSzz"c"%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r6   )@      r7   r8   r9   r/   rl   r:   r;   s   @r5   r   r     s    <) r6   r   c                   &     e Zd Zd fd	Zd Z xZS )EvollaFeedForwardc                    t         |           t        ||z        }t        j                  |      | _        t        j                  ||d      | _        t        j                         | _	        t        j                  ||d      | _
        y NFr   )r.   r/   rp   r   r   normr   fc1GELU
activationfc2)r2   r?   multr   r4   s       r5   r/   zEvollaFeedForward.__init__>  s`    d
O	LL%	99S)%8'')99Y%8r6   c           	      ~    | j                  | j                  | j                  | j                  |                        S r-   )r  r  r  r  )r2   rC   s     r5   rl   zEvollaFeedForward.forwardG  s+    xx1(>?@@r6   )   r   r;   s   @r5   r   r   =  s    9Ar6   r   c                   *     e Zd Zdef fdZd Z xZS )!EvollaSequenceCompressorResamplerr3   c           
         t         |           |j                  j                  }|j                  | _        t        j                  t        j                  | j
                  |      d      | _
        t        j                  g       | _        t        |j                        D ]g  }| j                  j                  t        j                  t!        ||j"                  |j$                        t'        ||j(                        g             i t        j*                  |j                        | _        t        j.                  ||j                        | _        y )NT)requires_grad)r?   r   r   )r?   r  )r.   r/   protein_encoder_configr{   resampler_num_latentsnum_latentsr   	ParameterrA   randnr   
ModuleListlayersrangeresampler_depthappendr   resampler_dim_headresampler_headsr   resampler_ff_multr   r  r   protein_projector)r2   r3   protein_repr_dim_r4   s       r5   r/   z*EvollaSequenceCompressorResampler.__init__L  s   !88DD!77||EKK0@0@BR$ScghmmB'v--. 
	AKK9 06;T;T\b\r\r *.>VE]E]^		
	 LL!3!34	!#+;V=O=O!Pr6   c                 j   |j                   d   }|j                   \  }}t        j                  || j                        j	                  |j
                        }t        j                  ||fd      }t        j                  |      j	                  | j                  j
                        }| j                  d    |j                  ddd      z  }|j	                  |j                        }| j                  D ]  \  }	}
 |	|||      |z   } |
|      |z   } | j                  |      }| j                  |      S )Nr   r'   r>   r=   )rI   rA   r   r  ra   r^   rB   r   r   rT   r  r  r  )r2   embedsr   br   r  latent_maskr   r   r   fftransformed_features               r5   rl   z)EvollaSequenceCompressorResampler.forwarda  s   LLO

AjjT%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\* 	,HD"67D1G;GkG+G	, #44W=yy,--r6   )r7   r8   r9   r(   r/   rl   r:   r;   s   @r5   r  r  K  s    Q| Q*.r6   r  c                       e Zd ZU dZej
                  dz  ed<   dZej
                  dz  ed<   dZe	ej
                  df   dz  ed<   dZ
e	ej
                  df   dz  ed<   y)EvollaProteinEncoderModelOutputNsequence_compressor_outputr   .r   r   )r7   r8   r9   r&  rA   FloatTensorro   r   r   rq   r   r   r6   r5   r%  r%  u  so     <@ 1 1D 8?26u((4/6:>M5**C/047>7;Je'',-4;r6   r%  c                   f     e Zd Zdef fdZedej                  dej                  fd       Z	 xZ
S )EvollaProteinEncoderr3   c                 z    t         |           t        |j                        | _        t        |      | _        y )Nr3   )r.   r/   r   r  modelr  sequence_compressor_resamplerr1   s     r5   r/   zEvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r6   r   r   c                     | j                  ||      }|j                  }| j                  ||      }t        ||j                        S )Nr   )r&  r   )r,  r   r-  r%  )r2   r   r   r   protein_outputprotein_embedssequence_reprs          r5   rl   zEvollaProteinEncoder.forward  sJ    iW'99::>>Z.'4,>>
 	
r6   )r7   r8   r9   r(   r/   r   rA   
LongTensorr'  rl   r:   r;   s   @r5   r)  r)  ~  s?    ^| ^
 
!1!1 
5CTCT 
 
r6   r)  c                   b     e Zd Z	 	 	 ddedz  dedz  dedz  f fdZd Z	 	 	 	 	 	 	 d	dZ xZS )
#EvollaSequenceAlignerCrossAttentionNprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    t         |           |j                  | _        |j                  | _        | j                  dz  | _        t        | j                  | j                  z        | _        | j                  | j                  z  | _        |j                  }|j                  }|j                  }t        j                  | j                  | j                        | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        t)        | j                        | _        t        j,                  |      | _        t        j                  | j                  | j                  |      | _        t3        | j                  |      | _        t        j6                  t9        j:                  dg            | _        t        j6                  t9        j:                  dg            | _        y )Nr   r           ) r.   r/   r{   r|   r   rp   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normDropoutr   out_projr   r"  r  rA   tensorgate_attentiongate_ffw)	r2   r3   r5  r6  r7  r   enable_biasffn_multr4   s	           r5   r/   z,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<d>P>PQD!#+>@R@R!SD#D!%D ,!#+@$BTBT!UD#%99-BDDVDV#WD !%D#'D &99_d6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r6   c	                    |||g}	|	D 
cg c]  }
|
|
	 }	}
|	st        d      t        j                  |	d      }	| j                  |      }| j	                  |      }| j
                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|| j"                  z  }|Mt        j$                  |j                  d      |j                  d            j                  |j&                        }|ddddddf   |	ddddddf   z  }t        j(                  ||j+                  dd	            }||j-                  dd
      j/                         z
  }|j1                  d|z
  j3                         t        j4                  |j6                        j8                        } t;        j<                  d      |      }t        j(                  ||      }|j!                  dddd      j?                         }|j                         dd	 | j@                  fz   } |j                  | }| jC                  |      }|S c c}
w c c}
w c c}
w )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        Nz=At least one modality should be provided for cross attention.r'   r>   r=   r   r   r   rH   Tr   )"r~   rA   rB   rD  r   r=  r>  ra   r?  r@  rA  rB  r   r|   r   r   r   r   r   r^   r   r   r   r   r   r   finforT   minr   Softmax
contiguousr   rF  )r2   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr  query_layerkey_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msa	key_layervalue_layernew_query_layer_shapenew_key_layer_shapenew_value_layer_shaper   attn_weightsattention_scoresattention_probscontext_layernew_context_layer_shapes                                r5   cross_attentionz3EvollaSequenceAlignerCrossAttention.cross_attention  si   * -.DFVW#/Aa1=AA\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&(;]K	 );1Q]Q;	;IIiQ/	*,A?S"-?Qq??ii3 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR'nn.s3$$$$7
 
 #INN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR!DJJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 -"**,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CDm4q BL < @s"   P5P5P:P:P?P?c           
      ^   |z|j                   \  }}}|jt        j                  ||      j                  |	j                        |	j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |
j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |j                  ||f      j                  z  j                  |j                        }nd }|}||j                         s$||j                         s||j                         rz|}| j                  ||||||||      }t        j                  | j                        |z  }||z   }|}| j                  |      t        j                  | j                        z  }||z   }|S )N)r   )rQ  rR  rS  rT  rU  rV  rW  rX  )rI   rA   r   ra   r^   expandTanyrk  tanhrH  r"  rI  )r2   rQ  protein_kv_statesstructure_kv_statesmsa_kv_statesrU  rV  rW  rX  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuesr   protein_kv_seq_lenr?   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r5   rl   z+EvollaSequenceAlignerCrossAttention.forward4  sL    (*;*A*A'B"C#+JJr#5699:L:S:ST(//6H"5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I#/4J4N4N4P).>.B.B.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr6   )NNN)NNNNNNN)r7   r8   r9   rp   r/   rk  rl   r:   r;   s   @r5   r4  r4    sb     +/,0&*1: !4Z1:  #Tz	1:
 t1:fnn "#!Gr6   r4  c                       e Zd Zy)rC  Nr   r   r6   r5   rC  rC  ~  r   r6   rC  c                       e Zd Zy)EvollaRotaryEmbeddingNr   r   r6   r5   r~  r~    r   r6   r~  c                       e Zd Zy)	EvollaMLPNr   r   r6   r5   r  r    r   r6   r  c                       e Zd Zy)EvollaAttentionNr   r   r6   r5   r  r    r   r6   r  c                       e Zd Zdedef fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	e
dz  d
edz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  fdZ xZS )EvollaDecoderLayerr3   r   c                     t         |   ||       |dz   t        |j                  |j                  z  d      z  dk(  rt        ||j                        | _        y y )Nr'   r   )r5  )r.   r/   maxnum_hidden_layersaligner_num_add_layersr4  r{   adapterr2   r3   r   r4   s      r5   r/   zEvollaDecoderLayer.__init__  s[    +MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr6   Nr   position_embeddingsr   r0   rw  	use_cachecache_positionrq  rr  rs  rt  ru  rv  rU  c                    |}| j                  |      } | j                  d|||||||d|\  }}||z   }|}| j                  |      }| j                  |      }||z   }t	        | d      r| j                  |||	|
||||      }|S )N)r   r   r0   rw  r  r  r  r  )rQ  rq  rr  rs  rU  rt  ru  rv  r   )input_layernorm	self_attnpost_attention_layernormmlpr}   r  )r2   r   r  r   r0   rw  r  r  rq  rr  rs  rt  ru  rv  rU  r   r{  r  s                     r5   rl   zEvollaDecoderLayer.forward  s    $ !,,]; *4>> 	
')%+) 3	
 	
q !=0 !55mD/ =04# LL*"3$7+ /#5%9- ) 	M r6   )NNNNFNNNNNNNN)r7   r8   r9   r(   rp   r/   rA   rn   rq   r2  r   r   rl   r:   r;   s   @r5   r  r    sQ   |   IM.204(,!&261537-12648.2/35||5 #5<<#=>E5 t+	5
 &&-5 5 $;5 ((4/5 !<<$.5 #\\D05 ||d*5 "LL4/5 $llT15 t+5 ,5r6   r  c                   N    e Zd ZdZdZdZg dZ ej                         d        Z	y)EvollaPreTrainedModelF)r  r  r4  c                    | j                   j                  }t        j                  | |       t	        |t
              rht        j                  |j                         t        j                  |j                         t        j                  |j                  j                         y t	        |t              r#t        j                  |j                  d|       y y )Nr9  )meanstd)r3   initializer_ranger   r   r   r4  r   zeros_rH  rI  ones_rD  weightr  normal_r   )r2   r   r  s      r5   r   z#EvollaPreTrainedModel._init_weights  s    kk++%%dF3fABKK--.KK(JJv,,334 ABLLcs; Cr6   N)
r7   r8   r9   r   r   r   r   rA   no_gradr   r   r6   r5   r  r    s8     "' U]]_< <r6   r  c            !           e Zd Zdef fdZd Zd Zeee		 	 	 	 	 	 	 	 	 	 	 	 	 dde
j                  dz  de
j                  dz  de
j                  dz  d	edz  d
e
j                  dz  dedz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  deez  fd                     Z xZS )EvollaModelr3   c           	      F   t         |   |       |j                  | _        |j                  | _        t        j                  | j                  |j                  | j                        | _        t        |      | _
        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t!        |j                  |j"                        | _        t'        |dd      | _        t+        |      | _        | j/                          y c c}w )Nr+  )r3   r   )epsgradient_checkpointingF)r.   r/   pad_token_idpadding_idx
vocab_sizer   	Embeddingr{   embed_tokensr)  protein_encoderr  r  r  r  r  rC  rms_norm_epsr  r   r  r~  
rotary_embr   r  s      r5   r/   zEvollaModel.__init__  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 	 #!'
 "&"4"4&:M:MN	&-f6NPU&V#/v>s   $Dc                     | j                   S r-   r  r   s    r5   r   z EvollaModel.get_input_embeddings  s       r6   c                     || _         y r-   r  r   s     r5   r   z EvollaModel.set_input_embeddings  s
    !r6   Nr   r   r0   rw  r   r  r  protein_input_idsprotein_attention_maskstructure_feats	msa_featsru  rv  rj   c                    |du |duz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}t        j                  |||j                  d   z   |j                        }||j                  d      }d}d}|^|	\| j                  ||	      }|j                  }t        j                  |j                  d   |j                  t
        j                        }t        | j                  ||||	      }|}| j                  ||
      }| j                   D ]  } ||f|||||||
||||||d|} | j#                  |      }t%        ||      }|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr+  r   r'   r]   r   )r^   rT   )r3   r   r   r  rw  )r0   )r   r0   rw  r  r  rq  rr  rs  rt  ru  rv  rU  r  )r   rw  )r~   r  r   r3   get_seq_lengthrA   rU   rI   r^   	unsqueezer  r&  r   r   r   r  r  r  r   )r2   r   r   r0   rw  r   r  r  r  r  r  r  ru  rv  r   past_seen_tokensprotein_featsrt  protein_outputscausal_maskr   r  decoder_layeroutputs                           r5   rl   zEvollaModel.forward  s   D -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L!(-C-O"22+5 3 O ,FFM!&!''*(//jj" );;'))+
 &"oom,oW![[ 	M)*) /#-"/$3'#5%9- .$7 M	& 		-0(++
 r6   )NNNNNNNNNNNNN)r7   r8   r9   r(   r/   r   r   r   r   r   rA   r2  rn   r   r'  r   rq   r   rl   r:   r;   s   @r5   r  r    s   | *!"  .2.204(,26!%26596:48.248.2d##d*d t+d &&-	d
 d ((4/d $;d ((4/d !++d2d !&t 3d **T1d $$t+d $llT1d t+d  
(	(!d    dr6   r  c                   4    e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dedz  deej                  z  fd              Z xZS )EvollaForProteinText2Textc                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  | j                  d      | _        | j                          y r  )
r.   r/   r  r,  r  r   r   r{   lm_headr   r1   s     r5   r/   z"EvollaForProteinText2Text.__init__l  sQ      (
 ++yy!3!3T__5Qr6   c                 6    | j                   j                         S r-   )r,  r   r   s    r5   r   z.EvollaForProteinText2Text.get_input_embeddingst  s    zz..00r6   c                 8    | j                   j                  |      S r-   )r,  r   r   s     r5   r   z.EvollaForProteinText2Text.set_input_embeddingsw  s    zz..u55r6   Nr   r   r   labelsr  r  r  logits_to_keepc	           
      h    | j                   d||||||d|	}
|
j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }d}|  | j                  d||| j                  d|	}t        |||
j                  |
j                  |
j                        }|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r   r   r   r  r  r  N)logitsr  r  )lossr  rw  r   r   r   )r,  r   r   rp   slicer  loss_functionr  r   rw  r   r   )r2   r   r   r   r  r  r  r  r  r   outputsr   slice_indicesr  r  
lm_outputss                   r5   rl   z!EvollaForProteinText2Text.forwardz  s    T ,64:: ,
)'/#9,
 ,
  118B>SV8W~ot4]kmA}a,?@A%4%%iVFtibhiD+#33!//))

 r6   )NNNNNNNr   )r7   r8   r9   r/   r   r   r   r   rA   r2  rn   r'  r   rp   rl   r:   r;   s   @r5   r  r  k  s    16  .2.226*.596:!%-.B##d*B t+B ((4/	B
   4'B !++d2B !&t 3B $;B ell*B  Br6   r  )r  r  r  )Tdataclassesr   rA   r    r   r   cache_utilsr   r   
generationr	   masking_utilsr
   r   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   r   esm.modeling_esmr   r   r   r   r   r   r   r   r    llama.modeling_llamar!   r"   r#   r$   r%   r&   configuration_evollar(   r)   
get_loggerr7   loggerr+   rF   rL   rz   rN   rs   r   r   r   r   r   r   r   r   r   r   r   r  r%  r)  r4  rC  r~  r  r  r  r  r  r  __all__r   r6   r5   <module>r     s   "   & . ) J  . 
 8 E
 
 
  = 
		H	%!] !(
2*
")) *
ZD 0 DD	] 		L 		 		 		 		* 		 	 2/ 2 2.+
!< +
\7 		 7 tA		 A'.		 '.T <k <  <
299 
$k")) k\	L 		0 		 		n 	>* >B<0 <,C' CLS 5 Sl Pr6   