
    qi3                     R   d dl Z d dlmZ d dlmZ d dlmZ d dlZd dlmZ ddl	m
Z ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZmZ ddlmZ ddlmZmZmZm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3 ddl4m5Z5m6Z6 d Z7 G d dejp                        Z9d Z:d Z; G d dejp                        Z<	 	 dadejp                  dejz                  d ejz                  d!ejz                  d"ejz                  dz  d#e>dz  d$e>d%e)e+   fd&Z? G d' d(ejp                        Z@ G d) d*ejp                        ZA G d+ d,ejp                        ZBd- ZC G d. d/ejp                        ZD G d0 d1ejp                        ZE G d2 d3e      ZF G d4 d5ejp                        ZG G d6 d7ejp                        ZHe, G d8 d9e'             ZI G d: d;eI      ZJ G d< d=ejp                        ZK G d> d?ejp                        ZL G d@ dAejp                        ZMee, G dB dCe!                    ZN G dD dEejp                        ZO G dF dGejp                        ZP edH       G dI dJejp                               ZQ G dK dLejp                        ZR G dM dNejp                        ZSdO ZT edP      dbdQ       ZUdRejz                  dSeVdTejz                  fdUZW eeU       G dV dWejp                               ZX G dX dYe      ZYe, G dZ d[e'             ZZ G d\ d]eZ      Z[ G d^ d_eZe      Z\g d`Z]y)c    N)Callable)	dataclass)Optional)nn   )initialization)ACT2FN)CacheDynamicCache)GenerationMixin)use_kernel_forward_from_hubuse_kernel_func_from_hubuse_kernelized_func)create_bidirectional_maskcreate_causal_mask)GradientCheckpointingLayer)"BaseModelOutputWithCrossAttentionsBaseModelOutputWithPast,BaseModelOutputWithPoolingAndCrossAttentionsCausalLMOutputWithPastModelOutput)ROPE_INIT_FUNCTIONSdynamic_rope_update)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)maybe_autocastmerge_with_config_defaults)OutputRecordercapture_outputs   )EvollaConfigSaProtConfigc                     | j                  |      j                         }t        j                  |d      j	                  |      |z  }|j                         |z   S )a  
    Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
    are ignored. This is modified from fairseq's `utils.make_positions`.

    Args:
        x: torch.Tensor x:

    Returns: torch.Tensor
    r$   dim)neinttorchcumsumtype_aslong)	input_idspadding_idxmaskincremental_indicess       \/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/evolla/modeling_evolla.py"create_position_ids_from_input_idsr5   4   sP     <<$((*D,,t3;;DADH##%33    c                   8     e Zd ZdZ fdZ	 	 	 	 ddZd Z xZS )EvollaSaProtEmbeddingszV
    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
    c                    t         |           t        j                  |j                  |j
                  |j                        | _        |j                  r1t        j                  |j
                  |j                        | _        nd | _        t        j                  |j                        | _        t        |dd      | _        | j#                  dt%        j&                  |j(                        j+                  d      d       |j                  | _        | j                   dk(  r;t        j                  |j(                  |j
                  | j,                        | _        |j0                  | _        |j2                  | _        d | _        y )	N)r1   epsposition_embedding_typeabsoluteposition_ids)r$   F
persistent)super__init__r   	Embedding
vocab_sizehidden_sizepad_token_idword_embeddingsemb_layer_norm_before	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropoutgetattrr<   register_bufferr,   arangemax_position_embeddingsexpandr1   position_embeddingstoken_dropoutmask_token_idr>   selfconfig	__class__s     r4   rC   zEvollaSaProtEmbeddings.__init__I   s2   !||F,=,=v?Q?Q_e_r_rs'' ll6+=+=6CXCXYDO"DOzz&"<"<='.v7PR\']$ELL)G)GHOOPWXej 	 	
 "..'':5')||..0B0BPTP`P`(D$ $11#11 r6   c                    |*|t        || j                        }n| j                  |      }|| j                  |      }|}| j                  r||j                  || j                  k(  j                  d      d      }d}||j                  d      n|j                  d   }|| j                  k(  j                  d      j                         |z  }|d|z
  z  d|z
  d d d d f   z  j                  |j                        }| j                  dk(  r| j                  |      }	||	z   }| j                  | j                  |      }|-||j                  d      z  j                  |j                        }|S )Nr?           gQ?r$   r=   )r5   r1   &create_position_ids_from_inputs_embedsrH   rV   masked_fillrW   	unsqueezesumshapefloattodtyper<   rU   rL   )
rY   r0   attention_maskr>   inputs_embeds
embeddingsmask_ratio_trainsrc_lengthsmask_ratio_observedrU   s
             r4   forwardzEvollaSaProtEmbeddings.forwardb   s    $A)TM]M]^#JJ=Y  00;M #
 )"7#//d>P>P1P0[0[\^0_adeJ)4B4N.,,R0T]TcTcdeTfK#,0B0B#B"G"G"K"Q"Q"SVa"a$,<(<=EXAXZ[]acgZg@hhll  J '':5"&":":<"H#&99J??&4J%$~'?'?'CCGG
HXHXYJ r6   c                    |j                         dd }|d   }t        j                  | j                  dz   || j                  z   dz   t        j                  |j
                        }|j                  d      j                  |      S )z
        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.

        Args:
            inputs_embeds: torch.Tensor

        Returns: torch.Tensor
        Nr?   r$   )re   devicer   )sizer,   rR   r1   r/   rn   r`   rT   )rY   rg   input_shapesequence_lengthr>   s        r4   r^   z=EvollaSaProtEmbeddings.create_position_ids_from_inputs_embeds   s     $((*3B/%a.||q /D4D4D"Dq"HPUPZPZcpcwcw
 %%a(//<<r6   NNNN)__name__
__module____qualname____doc__rC   rl   r^   __classcell__r[   s   @r4   r8   r8   D   s&    !6 /b=r6   r8   c                 b    | j                  dd      \  }}t        j                  | |fd      S )N   r?   r(   )chunkr,   catxx1x2s      r4   rotate_half_esmr      s/    WWQBWFB99rc2YB''r6   c                     |d d d d d | j                   d   d d f   }|d d d d d | j                   d   d d f   }| |z  t        |       |z  z   S )N)rb   r   )r~   cossins      r4   apply_rotary_pos_emb_esmr      sY    
aMaggbkM1$
%C
aMaggbkM1$
%CG*S011r6   c                        e Zd ZU dZej
                  ed<   def fdZd
dZ	dej
                  dej
                  de
ej
                  ej
                  f   fd	Z xZS )EvollaSaProtRotaryEmbeddingz
    Rotary position embeddings based on those in
    [RoFormer](https://huggingface.co/docs/transformers/model_doc/roformer). Query and keys are transformed by rotation
    matrices which depend on their relative positions.
    inv_freqr)   c                     t         |           || _        ddt        j                  d|dt        j
                        j                         |z  z  z  }| j                  d|       d | _        d | _	        d | _
        y )N      ?'  r   rz   re   r   )rB   rC   r)   r,   rR   int64rc   rQ   _seq_len_cached_cos_cached_sin_cached)rY   r)   r   r[   s      r4   rC   z$EvollaSaProtRotaryEmbedding.__init__   sn    %ELLC%++$N$T$T$VY\$\]^Z2#r6   c                 t   |j                   |   }|| j                  k7  s#| j                  j                  |j                  k7  r|| _        t	        j
                  |j                   |   |j                        j                  | j                        }t	        j                  || j                        }t	        j                  ||fd      j                  |j                        }|j                         d d d d d d f   | _        |j                         d d d d d d f   | _        | j                  | j                  fS )Nrn   r?   r(   )rb   r   r   rn   r,   rR   r.   r   outerr|   rd   r   r   r   )rY   r~   seq_dimensionseq_lentfreqsembs          r4   _update_cos_sin_tablesz2EvollaSaProtRotaryEmbedding._update_cos_sin_tables   s    ''-( d***d.>.>.E.E.Q#*D QWW]3AHHEMMdmm\AKK4==1E))UEN366qxx@C"wwytQ)9:D"wwytQ)9:D!1!111r6   qkreturnc                 .   | j                  |d      \  | _        | _        t        || j                  | j                        j	                  |j
                        t        || j                  | j                        j	                  |j
                        fS )Nr   )r   r   )r   r   r   r   rd   re   )rY   r   r   s      r4   rl   z#EvollaSaProtRotaryEmbedding.forward   s    -1-H-HZ\-H-]*$* %Q(8(8$:J:JKNNUVU\U\N]$Q(8(8$:J:JKNNUVU\U\N]
 	
r6   )rz   )rs   rt   ru   rv   r,   Tensor__annotations__r+   rC   r   tuplerl   rw   rx   s   @r4   r   r      sY     ll	 C 	 2 
 
%,, 
5u||A[;\ 
r6   r   modulequerykeyvaluerf   scalingrO   kwargsc                    ||j                  d      dz  }t        j                  ||j                  dd            |z  }|||z   }t        j
                  j                  |d      }t        j
                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nr?         rz   r   r(   )ptrainingr$   )
ro   r,   matmul	transposer   
functionalsoftmaxrO   r   
contiguous)
r   r   r   r   rf   r   rO   r   attn_weightsattn_outputs
             r4   eager_attention_forwardr      s     **R.D( <<s}}Q':;gEL!#n4==((2(>L==((6??([L,,|U3K''1-88:K$$r6   c                        e Zd Zd
 fd	Z	 	 	 ddej
                  dej                  dz  dej                  dz  dej                  dz  dee   de	ej
                     fd	Z
 xZS )EvollaSaProtSelfAttentionNc                    t         |           || _        |j                  |j                  z  dk7  r2t        |d      s&t        d|j                   d|j                   d      |j                  | _        t        |j                  |j                  z        | _        | j                  | j                  z  | _	        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        t        j                  |j                  | j                        | _        |j                  | _        d | _        |xs t%        |dd      | _        | j&                  dk(  rt)        | j                  	      | _        |j*                  | _        || _        d
| _        | j*                  xr | | _        y )Nr   embedding_sizezThe hidden size (z6) is not a multiple of the number of attention heads ()r<   r=   rotaryr(   r   )rB   rC   rZ   rF   num_attention_headshasattr
ValueErrorr+   attention_head_sizeall_head_sizer   Linearr   r   r   attention_probs_dropout_probrO   rotary_embeddingsrP   r<   r   
is_decoder	layer_idxr   	is_causal)rY   rZ   r<   r   is_cross_attentionr[   s        r4   rC   z"EvollaSaProtSelfAttention.__init__   s    : ::a?PVXhHi#F$6$6#7 8 445Q8 
 $*#=#= #&v'9'9F<V<V'V#W !558P8PPYYv1143E3EF
99V//1C1CDYYv1143E3EF
::!%'> (
'-zC
$ ''83%@TE]E]%^D" ++"C1C-Cr6   hidden_statesrf   encoder_hidden_statesencoder_attention_maskr   r   c                    |j                   d d \  }}||d| j                  f}| j                  |      j                  |      j	                  dd      }	|d u}
|
r|n|}|
r|n|}| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }|	| j                  dz  z  }	| j                  dk(  r| j                  |	|      \  }	}t        j                  | j                  j                  t              } || |	|||f| j                  sdn| j                  | j                   d|\  }}|j#                  ||d      j%                         }||fS )Nr?   r$   rz   r   r   r]   rO   r   )rb   r   r   viewr   r   r   r<   r   r   get_interfacerZ   _attn_implementationr   r   rO   r   reshaper   )rY   r   rf   r   r   r   
batch_size
seq_lengthhidden_shapequery_layerr   current_states	key_layervalue_layerattention_interfacer   r   s                    r4   rl   z!EvollaSaProtSelfAttention.forward  s    "/!4!4Sb!9
J"JD4L4LMjj/44\BLLQPQR2$>2D.-3E/>HH^,11,?II!QO	jj055lCMMaQRS "D$<$<d$BB''83%)%;%;K%S"K(?(M(MKK,,.E)
 %8	%
  $}}C$,,LL	%
 	%
!\ "))*j"EPPRL((r6   )NNFNNN)rs   rt   ru   rC   r,   r   FloatTensorr   r   r   rl   rw   rx   s   @r4   r   r      s    DF 48:>;?,)||,) ))D0,)  %0047	,)
 !& 1 1D 8,) +,,) 
u||	,)r6   r   c                   $     e Zd Z fdZd Z xZS )EvollaSaProtSelfOutputc                     t         |           t        j                  |j                  |j                        | _        t        j                  |j                        | _        y N)	rB   rC   r   r   rF   denserM   rN   rO   rX   s     r4   rC   zEvollaSaProtSelfOutput.__init__K  sB    YYv1163E3EF
zz&"<"<=r6   c                 T    | j                  |      }| j                  |      }||z   }|S r   r   rO   rY   r   input_tensors      r4   rl   zEvollaSaProtSelfOutput.forwardP  .    

=1]3%4r6   rs   rt   ru   rC   rl   rw   rx   s   @r4   r   r   J      >
r6   r   c                   :     e Zd Zd fd	Z	 	 	 ddee   fdZ xZS )EvollaSaProtAttentionc                     t         |           t        |||      | _        t	        |      | _        t        j                  |j                  |j                        | _        y )N)r   r   r:   )
rB   rC   r   rY   r   outputr   rJ   rF   rK   )rY   rZ   r   r   r[   s       r4   rC   zEvollaSaProtAttention.__init__X  sI    -f	^pq	,V4f&8&8f>S>STr6   r   c                     | j                  |      } | j                  |f|||d|\  }}| j                  ||      }|S )Nrf   r   r   )rJ   rY   r   )	rY   r   rf   r   r   r   hidden_states_lnr   _s	            r4   rl   zEvollaSaProtAttention.forward_  sZ      >>-8"
)"7#9	

 
Q kk+}=r6   )NFr   )rs   rt   ru   rC   r   r   rl   rw   rx   s   @r4   r   r   W  s)    U "# +,r6   r   c                 j    | dz  dt        j                  | t        j                  d      z        z   z  S )zz
    This is the gelu implementation from the original EVOLLA_SA_PROT repo. Using F.gelu yields subtly wrong results.
    g      ?r   g       @)r,   erfmathsqrt)r~   s    r4   gelur   s  s.     s7cEIIa$))C.&899::r6   c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )EvollaSaProtIntermediatec                     t         |           t        j                  |j                  |j
                        | _        y r   )rB   rC   r   r   rF   intermediate_sizer   rX   s     r4   rC   z!EvollaSaProtIntermediate.__init__{  s,    YYv1163K3KL
r6   r   r   c                 >    | j                  |      }t        |      }|S r   )r   r   )rY   r   s     r4   rl   z EvollaSaProtIntermediate.forward  s     

=1]+r6   rs   rt   ru   rC   r,   r   rl   rw   rx   s   @r4   r   r   z  s$    MU\\ ell r6   r   c                   $     e Zd Z fdZd Z xZS )EvollaSaProtOutputc                     t         |           t        j                  |j                  |j
                        | _        t        j                  |j                        | _	        y r   )
rB   rC   r   r   r   rF   r   rM   rN   rO   rX   s     r4   rC   zEvollaSaProtOutput.__init__  sB    YYv779K9KL
zz&"<"<=r6   c                 T    | j                  |      }| j                  |      }||z   }|S r   r   r   s      r4   rl   zEvollaSaProtOutput.forward  r   r6   r   rx   s   @r4   r   r     r   r6   r   c                   >     e Zd Z fdZ	 	 	 ddee   fdZd Z xZS )EvollaSaProtLayerc                    t         |           |j                  | _        d| _        t	        |      | _        |j                  | _        |j                  | _        | j                  r,| j                  st        |  d      t	        |d      | _	        t        |      | _        t        |      | _        t        j                  |j                   |j"                        | _        y )Nr$   z> should be used as a decoder model if cross attention is addedT)r   r:   )rB   rC   chunk_size_feed_forwardseq_len_dimr   	attentionr   add_cross_attentionRuntimeErrorcrossattentionr   intermediater   r   r   rJ   rF   rK   rX   s     r4   rC   zEvollaSaProtLayer.__init__  s    '-'E'E$.v6 ++#)#=#= ##??"dV+i#jkk"7SW"XD4V<(0f&8&8f>S>STr6   r   c                      | j                   |fd|i|}| j                  r4|2t        | d      st        d|  d       | j                  |f|||d|}| j                  |      }|S )Nrf   r  z'If `encoder_hidden_states` are passed, z` has to be instantiated with cross-attention layers by setting `config.add_cross_attention=True`r   )r  r   r   AttributeErrorr  feed_forward_chunk)rY   r   rf   r   r   r   attention_outputlayer_outputs           r4   rl   zEvollaSaProtLayer.forward  s     *4>>
)
 
 ??4@4!12$=dV D` ` 
  3t22  -&;'=	 
   ../?@r6   c                 n    | j                  |      }| j                  |      }| j                  ||      }|S r   )rJ   r  r   )rY   r	  attention_output_lnintermediate_outputr
  s        r4   r  z$EvollaSaProtLayer.feed_forward_chunk  s<    "nn-=>"//0CD{{#68HIr6   r   )	rs   rt   ru   rC   r   r   rl   r  rw   rx   s   @r4   r   r     s/    U$ "# +,@r6   r   c                   B     e Zd Z fdZe	 	 	 ddee   fd       Z xZS )EvollaSaProtEncoderc                 0   t         |           || _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        t        j                  |j                  |j                        | _        d| _        y c c}w )Nr:   F)rB   rC   rZ   r   
ModuleListrangenum_hidden_layersr   layerrJ   rF   rK   emb_layer_norm_aftergradient_checkpointing)rY   rZ   r   r[   s      r4   rC   zEvollaSaProtEncoder.__init__  sn    ]]uVMeMeGf#g!$5f$=#gh
$&LL1C1CI^I^$_!&+# $hs   Br   c                     t        | j                        D ]  \  }} ||f|||d|} | j                  r| j                  |      }t        |      S )Nr   )last_hidden_state)	enumerater  r  r   )rY   r   rf   r   r   r   ilayer_modules           r4   rl   zEvollaSaProtEncoder.forward  sk      )4 	OA|(-&;'=	
 M	 $$ 55mDM1MRRr6   r   )	rs   rt   ru   rC   r   r   r   rl   rw   rx   s   @r4   r  r    s:    ,  "#S +,S Sr6   r  c                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )EvollaSaProtPoolerc                     t         |           t        j                  |j                  |j                        | _        t        j                         | _        y r   )rB   rC   r   r   rF   r   Tanh
activationrX   s     r4   rC   zEvollaSaProtPooler.__init__  s9    YYv1163E3EF
'')r6   r   r   c                 \    |d d df   }| j                  |      }| j                  |      }|S )Nr   )r   r   )rY   r   first_token_tensorpooled_outputs       r4   rl   zEvollaSaProtPooler.forward  s6     +1a40

#566r6   r   rx   s   @r4   r  r    s#    $
U\\ ell r6   r  c                   t     e Zd ZU eed<   dgZdZdZdZdZ	e
 eedd      g eedd      gdZ fd	Z xZS )
EvollaSaProtPreTrainedModelrZ   r   Tr$   r  )index
layer_namer  )r   
attentionscross_attentionsc                 *   t         |   |       t        |t              rsddt	        j
                  d|j                  dt        j                        j                         |j                  z  z  z  }t        j                  |j                  |       y y )Nr   r   r   rz   r   )rB   _init_weights
isinstancer   r,   rR   r)   r   rc   initcopy_r   )rY   r   r   r[   s      r4   r+  z)EvollaSaProtPreTrainedModel._init_weights	  sm    f%f9:eQ

AU[[(Y(_(_(adjdndn(nopHJJv1 ;r6   )rs   rt   ru   r&   r   _no_split_modules_supports_flash_attn_supports_sdpa_supports_flex_attn_supports_attention_backendr   r"   r   _can_record_outputsr+  rw   rx   s   @r4   r%  r%    sd    ,-N"& +%&?qU`ab4AJZ[
2 2r6   r%  c            
            e Zd Zdef fdZd Zd Zee	 d
de	j                  dz  de	j                  dz  dee	j                     ez  fd	              Z xZS )EvollaSaProtProteinEncoderrZ   c                     t         |   |       t        |      | _        t	        |      | _        | j                          y r   )rB   rC   r8   rh   r  encoder	post_initrX   s     r4   rC   z#EvollaSaProtProteinEncoder.__init__  s2     08*62r6   c                 .    | j                   j                  S r   rh   rH   rY   s    r4   get_input_embeddingsz/EvollaSaProtProteinEncoder.get_input_embeddings  s    ...r6   c                 &    || j                   _        y r   r;  rY   r   s     r4   set_input_embeddingsz/EvollaSaProtProteinEncoder.set_input_embeddings  s    */'r6   Nr0   rf   r   c                 ^   |j                         }|\  }}|j                  }|t        j                  ||f|      }| j	                  ||      }t        | j                  ||      } | j                  |fd|i|}	|	d   }
t        |
|	j                  |	j                  |	j                        S )Nr   r0   rf   )rZ   rg   rf   rf   r   )r  r   r(  r)  )ro   rn   r,   onesrh   r   rZ   r8  r   r   r(  r)  )rY   r0   rf   r   rp   r   r   rn   rg   encoder_outputssequence_outputs              r4   rl   z"EvollaSaProtProteinEncoder.forward  s      nn&!,
J!!!"ZZ*j)A6RN)N[2;;')
 '$,,}^^^W]^)!,;-)77&11,==	
 	
r6   r   )rs   rt   ru   r&   rC   r=  r@  r!   r#   r,   r   r   r   rl   rw   rx   s   @r4   r6  r6    sw    | /0   /3
<<$&
 t+

 
u||	K	K
   
r6   r6  c                   &     e Zd Zd fd	Zd Z xZS )!EvollaSequenceCompressorAttentionc                 j   t         |           |dz  | _        || _        ||z  }t	        j
                  |      | _        t	        j
                  |      | _        t	        j                  ||d      | _	        t	        j                  ||dz  d      | _
        t	        j                  ||d      | _        y )Nr   Fbiasrz   )rB   rC   scaleheadsr   rJ   
norm_medianorm_latentsr   to_qto_kvto_out)rY   r)   dim_headrL  	inner_dimr[   s        r4   rC   z*EvollaSequenceCompressorAttention.__init__?  s    t^

u$	,,s+LL-IIc959	YYsIM>
ii	3U;r6   c                 F   | j                  |      }| j                  |      }| j                  }| j                  |      }t	        j
                  ||fd      }| j                  |      j                  dd      \  }}|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|j                  |j                  d      |j                  d      |d      j                  dddd      }|| j                  z  }t	        j                  ||j                  dd            }	|	|	j                  dd	      j                         z
  }	|	j                   \  }
}}}t	        j"                  ||      j%                  |j&                        }|d
d
d
d
d
d
f   }|d
d
d
d
d
d
f   }||z  }|	j)                  d|z
  j+                         d      }	|	j-                  d      }t	        j                  ||      }|j                  dddd      }|j/                  |j                  d      |j                  d      d      }| j1                  |      S )z
        Args:
            x (torch.Tensor): image features
                shape (b, n1, D)
            latent (torch.Tensor): latent features
                shape (b, n2, D);  n2: num of latent tokens
        r   r(   rz   r?   r   r$   r   Tr)   keepdimNg     )rM  rN  rL  rO  r,   r|   rP  r{   r   ro   permuterK  r   r   amaxdetachrb   rC  rd   rn   r_   boolr   r   rQ  )rY   r~   latentsr2   hr   kv_inputr   vsimbsnhskdokdrC  mask_expones_expattnouts                      r4   rl   z)EvollaSequenceCompressorAttention.forwardL  sB    OOA##G,JJIIg99a\r2zz(#))2 * 
1 FF166!9affQiB/771aCFF166!9affQiB/771aCFF166!9affQiB/771aC

N ll1akk"b12CHHTH299;;99BSzz"c"%%dkk24q()aD()("ooq4xoo/6{{r{"ll4#kk!Q1% kk#((1+sxx{B7{{3r6   )@      r   rx   s   @r4   rG  rG  >  s    <) r6   rG  c                   &     e Zd Zd fd	Zd Z xZS )EvollaFeedForwardc                    t         |           t        ||z        }t        j                  |      | _        t        j                  ||d      | _        t        j                         | _	        t        j                  ||d      | _
        y NFrI  )rB   rC   r+   r   rJ   normr   fc1GELUr   fc2)rY   r)   multrS  r[   s       r4   rC   zEvollaFeedForward.__init__y  s`    d
O	LL%	99S)%8'')99Y%8r6   c           	      ~    | j                  | j                  | j                  | j                  |                        S r   )rq  r   ro  rn  )rY   r~   s     r4   rl   zEvollaFeedForward.forward  s+    xx1(>?@@r6   )   r   rx   s   @r4   rk  rk  x  s    9Ar6   rk  c                   *     e Zd Zdef fdZd Z xZS )!EvollaSequenceCompressorResamplerrZ   c           
         t         |           |j                  j                  }|j                  | _        t        j                  t        j                  | j
                  |      d      | _
        t        j                  g       | _        t        |j                        D ]g  }| j                  j                  t        j                  t!        ||j"                  |j$                        t'        ||j(                        g             i t        j*                  |j                        | _        t        j.                  ||j                        | _        y )NT)requires_grad)r)   rR  rL  )r)   rr  )rB   rC   protein_encoder_configrF   resampler_num_latentsnum_latentsr   	Parameterr,   randnr[  r  layersr  resampler_depthappendrG  resampler_dim_headresampler_headsrk  resampler_ff_multrJ   rn  r   protein_projector)rY   rZ   protein_repr_dimr   r[   s       r4   rC   z*EvollaSequenceCompressorResampler.__init__  s   !88DD!77||EKK0@0@BR$ScghmmB'v--. 
	AKK9 06;T;T\b\r\r *.>VE]E]^		
	 LL!3!34	!#+;V=O=O!Pr6   c                 j   |j                   d   }|j                   \  }}t        j                  || j                        j	                  |j
                        }t        j                  ||fd      }t        j                  |      j	                  | j                  j
                        }| j                  d    |j                  ddd      z  }|j	                  |j                        }| j                  D ]  \  }	}
 |	|||      |z   } |
|      |z   } | j                  |      }| j                  |      S )Nr   r$   r(   r?   )rb   r,   rC  r{  rd   rn   r|   r[  r   re   r~  r  rn  )rY   embedsr2   br`  r   latent_maskrC  r[  rf  fftransformed_features               r4   rl   z)EvollaSequenceCompressorResampler.forward  s   LLO

AjjT%5%5699$++Fyy$,!4 zz!} 3 34,,t$tyyQ'::**V\\* 	,HD"67D1G;GkG+G	, #44W=yy,--r6   )rs   rt   ru   r%   rC   rl   rw   rx   s   @r4   rv  rv    s    Q| Q*.r6   rv  c                       e Zd ZU dZej
                  dz  ed<   dZej
                  dz  ed<   dZe	ej
                  df   dz  ed<   dZ
e	ej
                  df   dz  ed<   y)EvollaProteinEncoderModelOutputNsequence_compressor_outputr  .r   r(  )rs   rt   ru   r  r,   r   r   r  r   r   r(   r6   r4   r  r    so     <@ 1 1D 8?26u((4/6:>M5**C/047>7;Je'',-4;r6   r  c                   f     e Zd Zdef fdZedej                  dej                  fd       Z	 xZ
S )EvollaProteinEncoderrZ   c                 z    t         |           t        |j                        | _        t        |      | _        y )NrZ   )rB   rC   r6  ry  modelrv  sequence_compressor_resamplerrX   s     r4   rC   zEvollaProteinEncoder.__init__  s.    /v7T7TU
-NV\-]*r6   r0   rf   c                     | j                  ||      }|j                  }| j                  ||      }t        ||j                        S )NrB  )r  r  )r  r  r  r  )rY   r0   rf   r   protein_outputprotein_embedssequence_reprs          r4   rl   zEvollaProteinEncoder.forward  sJ    iW'99::>>Z.'4,>>
 	
r6   )rs   rt   ru   r%   rC   r   r,   
LongTensorr   rl   rw   rx   s   @r4   r  r    s?    ^| ^
 
!1!1 
5CTCT 
 
r6   r  c                   b     e Zd Z	 	 	 ddedz  dedz  dedz  f fdZd Z	 	 	 	 	 	 	 d	dZ xZS )
#EvollaSequenceAlignerCrossAttentionNprotein_encoder_dimstructure_encoder_dimmsa_encoder_dimc                    t         |           |j                  | _        |j                  | _        | j                  dz  | _        t        | j                  | j                  z        | _        | j                  | j                  z  | _        |j                  }|j                  }|j                  }t        j                  | j                  | j                        | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        |Kt        j                  || j                        | _        t        j                  || j                        | _        nd | _        d | _        t)        | j                        | _        t        j,                  |      | _        t        j                  | j                  | j                  |      | _        t3        | j                  |      | _        t        j6                  t9        j:                  dg            | _        t        j6                  t9        j:                  dg            | _        y )Nr   rI  r]   ) rB   rC   rF   r   rK  r+   r   r   $aligner_attention_probs_dropout_probaligner_enable_biasaligner_ffn_multr   r   r   key_proteinvalue_proteinkey_structurevalue_structurekey_msa	value_msaEvollaRMSNormattention_normrM   rO   out_projrk  r  r|  r,   tensorgate_attentiongate_ffw)	rY   rZ   r  r  r  r   enable_biasffn_multr[   s	           r4   rC   z,EvollaSequenceAlignerCrossAttention.__init__  s    	!--#)#=#= --t3
#&t'7'7$:R:R'R#S !558P8PP'-'R'R$00**YYt//1C1CD
*!yy)<d>P>PQD!#+>@R@R!SD#D!%D ,!#+@$BTBT!UD#%99-BDDVDV#WD !%D#'D &99_d6H6HIDLYY8J8JKDNDL!DN+D,<,<=zz">?		$"2"2D4D4D;W#D$4$4h? ll5<<+>?U\\3%%89r6   c	                    |||g}	|	D 
cg c]  }
|
|
	 }	}
|	st        d      t        j                  |	d      }	| j                  |      }| j	                  |      }| j
                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}| j                  @| j                  4|j                  |      }| j                  |      }| j                  |      }nd}d}|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|||g}|D 
cg c]  }
|
|
	 }}
t        j                  |d      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|j                         dd | j                  | j                  fz   } |j                  | j!                  dddd      }|| j"                  z  }|Mt        j$                  |j                  d      |j                  d            j                  |j&                        }|ddddddf   |	ddddddf   z  }t        j(                  ||j+                  dd	            }||j-                  dd
      j/                         z
  }|j1                  d|z
  j3                         t        j4                  |j6                        j8                        } t;        j<                  d      |      }t        j(                  ||      }|j!                  dddd      j?                         }|j                         dd	 | j@                  fz   } |j                  | }| jC                  |      }|S c c}
w c c}
w c c}
w )z
        query_states: text
        key_value_states: protein
        query_states: [bs, query_seq_len, dim]
        key_value_states: [bs, kv_seq_len, dim]
        query_attn_mask: [bs, query_seq_len]
        kv_attn_mask: [bs, kv_seq_len]
        Nz=At least one modality should be provided for cross attention.r$   r(   r?   r   rz   r   r   TrU  )"r   r,   r|   r  r   r  r  rd   r  r  r  r  ro   r   r   r   rW  rK  rC  rn   r   r   rX  rY  r_   rZ  finfore   minr   Softmaxr   r   r  )rY   query_statesprotein_key_value_statesstructure_key_value_statesmsa_key_value_statesquery_attn_maskprotein_kv_attn_maskstructure_kv_attn_maskmsa_kv_attn_maskkv_attn_maskr   r   key_layer_proteinvalue_layer_proteinkey_layer_structurevalue_layer_structurekey_layer_msavalue_layer_msar   r   new_query_layer_shapenew_key_layer_shapenew_value_layer_shaperf   r   attention_scoresattention_probscontext_layernew_context_layer_shapes                                r4   cross_attentionz3EvollaSequenceAlignerCrossAttention.cross_attention  si   * -.DFVW#/Aa1=AA\]]yy15)),7 jj-'D,>,>,J'?'B'B<'P$ $ 0 01I J"&"4"45M"N $"&)d.B.B.N)C)F)F|)T&"&"4"45O"P$($8$89S$T!"&$(!<<#(B#7#:#:<#H  LL)=>M"nn-ABO M"O&(;]K	 );1Q]Q;	;IIiQ/	*,A?S"-?Qq??ii3 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR'nn.s3$$$$7
 
 #INN$78@@Aq!L	 + 0 0 23B 7$$$$;
 !
 'k&&(=>FFq!QPQR!DJJ. "#jj):):1)=|?P?PQR?STWWXdXkXklO(D!T)9:\!TSWYZJZ=[[||K1D1DR1LM#l&7&7B&7&M&T&T&VV'33%%'\5G5G)H)L)L
 -"**,-=> _kB%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S***,CDm4q BL < @s"   P5P5P:P:P?P?c           
      ^   |z|j                   \  }}}|jt        j                  ||      j                  |	j                        |	j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |
j                  ||f      j                  z  j                  |j                        }nd }|z|j                   \  }}}|jt        j                  ||      j                  |	j                        |j                  ||f      j                  z  j                  |j                        }nd }|}||j                         s$||j                         s||j                         rz|}| j                  ||||||||      }t        j                  | j                        |z  }||z   }|}| j                  |      t        j                  | j                        z  }||z   }|S )N)ro   )r  r  r  r  r  r  r  r  )rb   r,   rC  rd   rn   rT   Tanyr  tanhr  r  r  )rY   r  protein_kv_statesstructure_kv_statesmsa_kv_statesr  r  r  r  protein_batch_maskstructure_batch_maskmsa_batch_maskpast_key_valuesr`  protein_kv_seq_lenr)   structure_kv_seq_lenmsa_kv_seq_lenr   residuals                       r4   rl   z+EvollaSequenceAlignerCrossAttention.forwardo  sL    (*;*A*A'B"C#+JJr#5699:L:S:ST(//6H"5M/NPPQ"&--. %
 $( *,?,E,E)B$c%-JJr#78;;<N<U<UV*118Lb7Q1RTTU"(//0 '
 &*"$&3&9&9#B'JJr>2556H6O6OP$++."1E+FHHI"]))* !
  $$ */C/G/G/I#/4J4N4N4P).>.B.B.D$H 00*):+>%2 /%9'=!1 1 	M "JJt':':;mKM$}4M$H GGM2UZZ5NNM$}4Mr6   r   )NNNNNNN)rs   rt   ru   r+   rC   r  rl   rw   rx   s   @r4   r  r    sb     +/,0&*1: !4Z1:  #Tz	1:
 t1:fnn "#!Gr6   r  RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	r  r;   r   Nc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z<
        EvollaRMSNorm is equivalent to T5LayerNorm
        N)rB   rC   r   r|  r,   rC  weightvariance_epsilon)rY   rF   r;   r[   s      r4   rC   zEvollaRMSNorm.__init__  s1     	ll5::k#:; #r6   r   c                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )Nrz   r?   T)rV  )	re   rd   r,   float32powmeanrsqrtr  r  )rY   r   input_dtypevariances       r4   rl   zEvollaRMSNorm.forward  sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r6   c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)r   r  rb   r  r<  s    r4   
extra_reprzEvollaRMSNorm.extra_repr  s*    ))*+6$2G2G1HIIr6   )gư>)
rs   rt   ru   rc   rC   r,   r   rl   r  rw   rx   s   @r4   r  r    s7    $ $$ $;U\\ ;ell ;Jr6   r  c                        e Zd ZU ej                  ed<   ddef fdZe	 	 	 ddedz  de	d   de
dz  ded	ef   fd
       Z ej                         ed               Z xZS )EvollaRotaryEmbeddingr   NrZ   c                    t         |           |j                  | _        |j                  | _        || _        | j
                  j                  d   | _        | j                  }| j                  dk7  rt        | j                     } || j
                  |      \  }| _
        | j                  d|d       | j                  d|j                         d       y )N	rope_typedefaultr   Fr@   original_inv_freq)rB   rC   rS   max_seq_len_cachedoriginal_max_seq_lenrZ   rope_parametersr  compute_default_rope_parametersr   attention_scalingrQ   clone)rY   rZ   rn   rope_init_fnr   r[   s        r4   rC   zEvollaRotaryEmbedding.__init__  s    "("@"@$*$B$B!44[A!%!E!E>>Y&.t~~>L+7V+L($(ZeD0(..2BuUr6   rn   ztorch.devicer   r   ztorch.Tensorc                    | j                   d   }t        | dd      xs | j                  | j                  z  }d}d|t	        j
                  d|dt        j                        j                  |t        j                        |z  z  z  }||fS )	a  
        Computes the inverse frequencies according to the original RoPE implementation
        Args:
            config ([`~transformers.PreTrainedConfig`]):
                The model configuration.
            device (`torch.device`):
                The device to use for initialization of the inverse frequencies.
            seq_len (`int`, *optional*):
                The current sequence length. Unused for this type of RoPE.
        Returns:
            Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
            post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
        
rope_thetahead_dimNr   r   rz   r   rn   re   )	r  rP   rF   r   r,   rR   r   rd   rc   )rZ   rn   r   baser)   attention_factorr   s          r4   r  z5EvollaRotaryEmbedding.compute_default_rope_parameters  s    & %%l3fj$/c63E3EIcIc3c U\\!S!5;;?BB&X]XcXcBdgjjk
 )))r6   c                 N   | j                   d d d d f   j                         j                  |j                  d   dd      j	                  |j
                        }|d d d d d f   j                         }t        |j
                  j                  t              r/|j
                  j                  dk7  r|j
                  j                  nd}t        |d      5  |j                         |j                         z  j                  dd      }t        j                  ||fd	      }|j                         | j                  z  }|j                         | j                  z  }	d d d        j	                  |j                   
      	j	                  |j                   
      fS # 1 sw Y   AxY w)Nr   r?   r$   mpscpuF)device_typeenabledrz   r(   r   )r   rc   rT   rb   rd   rn   r,  typestrr    r   r,   r|   r   r  r   re   )
rY   r~   r>   inv_freq_expandedposition_ids_expandedr   r   r   r   r   s
             r4   rl   zEvollaRotaryEmbedding.forward  sR    !MM$4-8>>@GGHZHZ[\H]_acdehhijiqiqr ,QaZ 8 > > @'1!((--'E!((--[`J`ahhmmfkUC 	5&,,.1F1L1L1NNYYZ[]^_E))UEN3C'')d444C'')d444C		5 vvAGGv$cff177f&;;;	5 	5s   BFF$r   r   )rs   rt   ru   r,   r   r   r%   rC   staticmethodr   r+   r   rc   r  no_gradr   rl   rw   rx   s   @r4   r  r    s    llV| V  &*+/"*t#*(* t* 
~u$	%	* *: U]]_<  <r6   r  c                   $     e Zd Z fdZd Z xZS )	EvollaMLPc                    t         |           || _        |j                  | _        |j                  | _        t        j                  | j                  | j                  |j                        | _        t        j                  | j                  | j                  |j                        | _	        t        j                  | j                  | j                  |j                        | _
        t        |j                     | _        y )NrI  )rB   rC   rZ   rF   r   r   r   mlp_bias	gate_projup_proj	down_projr	   
hidden_actact_fnrX   s     r4   rC   zEvollaMLP.__init__  s    !--!'!9!94#3#3T5K5KRXRaRabyy!1!143I3IPVP_P_`4#9#94;K;KRXRaRabV../r6   c                     | j                  | j                  | j                  |            | j                  |      z        }|S r   )r  r  r  r  )rY   r~   r  s      r4   rl   zEvollaMLP.forward  s6    NN4;;t~~a/@#ADLLQRO#ST	r6   r   rx   s   @r4   r	  r	    s    0r6   r	  c                     | dd| j                   d   dz  f   }| d| j                   d   dz  df   }t        j                  | |fd      S )z*Rotates half the hidden dims of the input..Nr?   rz   r(   )rb   r,   r|   r}   s      r4   rotate_halfr    sZ    	
3"!''"+"""	#B	
3q ""	#B99rc2YB''r6   rotary_pos_embc                     |j                  |      }|j                  |      }| |z  t        |       |z  z   }||z  t        |      |z  z   }||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    )r`   r  )r   r   r   r   unsqueeze_dimq_embedk_embeds          r4   apply_rotary_pos_embr  &  sY    & --
&C
--
&C3w;q>C/0G3w;q>C/0GGr6   r   n_repr   c                     | j                   \  }}}}|dk(  r| S | dddddddddf   j                  |||||      } | j                  |||z  ||      S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r$   N)rb   rT   r   )r   r  batchnum_key_value_headsslenr  s         r4   	repeat_kvr  @  so    
 2?1D1D.Ehz!!Qa"23::5BUW\^bdlmM  (;e(CT8TTr6   c                       e Zd ZdZdedef fdZ	 	 	 	 ddej                  de	ej                  ej                  f   dz  dej                  dz  d	e
dz  d
ej                  dz  dee   de	ej                  ej                  f   fdZ xZS )EvollaAttentionz=Multi-headed attention from 'Attention Is All You Need' paperrZ   r   c                 d   t         |           || _        || _        t	        |d|j
                  |j                  z        | _        |j                  |j                  z  | _	        | j                  dz  | _
        |j                  | _        d| _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j
                  |j                  | j                  z  |j                        | _        t        j                  |j                  | j                  z  |j
                  |j                        | _        y )Nr  r   TrI  )rB   rC   rZ   r   rP   rF   r   r  r  num_key_value_groupsr   attention_dropoutr   r   r   attention_biasq_projk_projv_projo_projrY   rZ   r   r[   s      r4   rC   zEvollaAttention.__init__P  sM   "
F4F4F&JdJd4de$*$>$>&B\B\$\!}}d*!'!9!9ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii : :T]] JQWQfQf
 ii&&68J8JQWQfQf
r6   Nr   rU   rf   r  cache_positionr   r   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }	| j                  |      j                  |      j	                  dd      }
| j                  |      j                  |      j	                  dd      }|\  }}t        |	|
||      \  }	}
|'|||d}|j                  |
|| j                  |      \  }
}t        j                  | j                  j                  t              } || |	|
||f| j                  sdn| j                   | j"                  d|\  }} |j$                  g |d j'                         }| j)                  |      }||fS )Nr?   r$   rz   )r   r   r+  r]   r   )rb   r  r&  r   r   r'  r(  r  updater   r   r   rZ   r   r   r   r$  r   r   r   r)  )rY   r   rU   rf   r  r+  r   rp   r   r  
key_statesvalue_statesr   r   cache_kwargsr   r   r   s                     r4   rl   zEvollaAttention.forwardg  s    $))#2.88b8$--8{{=166|DNNqRST[[/44\BLLQPQR
{{=166|DNNqRST&S#7jRUWZ#[ j&#&snUL'6'='=j,X\XfXfht'u$J(?(M(MKK,,.E)
 %8	%
  $}}C$2H2HLL	%
 	%
!\ *k));;;;FFHkk+.L((r6   rr   )rs   rt   ru   rv   r%   r+   rC   r,   r   r   r
   r  r   r   rl   rw   rx   s   @r4   r!  r!  L  s    G
| 
 
4 IM.2(,26))||)) #5<<#=>E)) t+	))
 )) ((4/)) +,)) 
u||U\\)	*))r6   r!  c                       e Zd Zdedef fdZ	 	 	 	 	 	 	 	 	 	 	 	 	 ddej                  deej                  ej                  f   dz  dej                  dz  dej                  dz  d	e
dz  d
edz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  dz  dej                  fdZ xZS )EvollaDecoderLayerrZ   r   c                    t         |           |j                  | _        t        ||      | _        t        |      | _        t        |j                  |j                        | _	        t        |j                  |j                        | _
        |dz   t        |j                  |j                  z  d      z  dk(  rt        ||j                        | _        y y )NrZ   r   r:   r$   r   )r  )rB   rC   rF   r!  	self_attnr	  mlpr  rms_norm_epsinput_layernormpost_attention_layernormmaxr  aligner_num_add_layersr  adapterr*  s      r4   rC   zEvollaDecoderLayer.__init__  s    !--()LV$,V-?-?VEXEXY(5f6H6HfNaNa(b%MS!9!9V=Z=Z!Z\]^^bcc>$*$6$6DL dr6   Nr   rU   rf   r>   r  	use_cacher+  r  r  r  r  r  r  r  r   c                    |}| j                  |      } | j                  d|||||||d|\  }}||z   }|}| j                  |      }| j                  |      }||z   }t	        | d      r| j                  |||	|
||||      }|S )N)r   rf   r>   r  r=  r+  rU   r<  )r  r  r  r  r  r  r  r  r  )r8  r5  r9  r6  r   r<  )rY   r   rU   rf   r>   r  r=  r+  r  r  r  r  r  r  r  r   r  r   s                     r4   rl   zEvollaDecoderLayer.forward  s    $ !,,]; *4>> 	
')%+) 3	
 	
q !=0 !55mD/ =04# LL*"3$7+ /#5%9- ) 	M r6   )NNNNFNNNNNNNN)rs   rt   ru   r%   r+   rC   r,   r   r   r  r
   rZ  rl   rw   rx   s   @r4   r2  r2    s_   |  $ IM.204(,!&261537-12648.2/35||5 #5<<#=>E5 t+	5
 &&-5 5 $;5 ((4/5 !<<$.5 #\\D05 ||d*5 "LL4/5 $llT15 t+5 ,5" 
#5r6   r2  c                        e Zd ZU eed<   dZdZg dZdgZdZ	dZ
dZdZdZeedZ ej$                          fd       Z xZS )	EvollaPreTrainedModelrZ   r  T)r2  rv  r  r  F)r   r(  c                    | j                   j                  }t        |   |       t	        |t
              rht        j                  |j                         t        j                  |j                         t        j                  |j                  j                         y t	        |t              r#t        j                  |j                  d|       y y )Nr]   )r  std)rZ   initializer_rangerB   r+  r,  r  r-  zeros_r  r  ones_r  r  rv  normal_r[  )rY   r   rB  r[   s      r4   r+  z#EvollaPreTrainedModel._init_weights  s    kk++f%fABKK--.KK(JJv,,334 ABLLcs; Cr6   )rs   rt   ru   r%   r   base_model_prefixsupports_gradient_checkpointingr/  _skip_keys_device_placementr0  r1  r2  _can_compile_fullgraphr3  r2  r!  r4  r,   r  r+  rw   rx   s   @r4   r@  r@    so    &*#
 $5"5 N!"'+%
 U]]_< <r6   r@  c            !           e Zd Zdef fdZd Zd Zeee		 	 	 	 	 	 	 	 	 	 	 	 	 dde
j                  dz  de
j                  dz  de
j                  dz  d	edz  d
e
j                  dz  dedz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  de
j                  dz  deez  fd                     Z xZS )EvollaModelrZ   c           	      F   t         |   |       |j                  | _        |j                  | _        t        j                  | j                  |j                  | j                        | _        t        |      | _
        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        t!        |j                  |j"                        | _        t'        |dd      | _        t+        |      | _        | j/                          y c c}w )Nr  r4  r:   r  F)rB   rC   rG   r1   rE   r   rD   rF   embed_tokensr  protein_encoderr  r  r  r2  r~  r  r7  rn  rP   r  r  
rotary_embr9  r*  s      r4   rC   zEvollaModel.__init__  s     !.. ++LL&:L:LdN^N^_36Bmm "'v'?'?!@
 	 #!'
 "&"4"4&:M:MN	&-f6NPU&V#/v>s   $Dc                     | j                   S r   rN  r<  s    r4   r=  z EvollaModel.get_input_embeddings  s       r6   c                     || _         y r   rR  r?  s     r4   r@  z EvollaModel.set_input_embeddings  s
    !r6   Nr0   rf   r>   r  rg   r=  r+  protein_input_idsprotein_attention_maskstructure_feats	msa_featsr  r  r   c                    |du |duz  rt        d      || j                  |      }|r|t        | j                        }|F||j	                         nd}t        j                  |||j                  d   z   |j                        }||j                  d      }d}d}|^|	\| j                  ||	      }|j                  }t        j                  |j                  d   |j                  t
        j                        }t        | j                  ||||	      }|}| j                  ||
      }| j                   D ]  } ||f|||||||
||||||d|} | j#                  |      }t%        ||      }|S )a;  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence in structure-aware tokens. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.
        structure_feats (torch.FloatTensor):
            The input IDs for purely structure-based features. Should be of shape `(batch_size, structure_seq_length, structure_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        msa_feats (torch.FloatTensor):
            The input IDs for purely MSA-based features. Should be of shape `(batch_size, msa_seq_length, msa_feat_dim)` and type `torch.FloatTensor`. Dummy input for now.
        structure_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely structure-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `structure_feats`. Dummpy input for now.
        msa_batch_mask (torch.Tensor):
            The batch mask to decide which protein sequences are purely MSA-based. Should be of shape `(batch_size)` and type `torch.Tensor`. Should be paired with `msa_feats`. Dummpy input for now.
        Nz:You must specify exactly one of input_ids or inputs_embedsr  r   r$   r   rB  r  )rZ   rg   rf   r+  r  )r>   )rf   r>   r  r=  r+  r  r  r  r  r  r  r  rU   )r  r  )r   rN  r   rZ   get_seq_lengthr,   rR   rb   rn   r`   rO  r  rC  rZ  r   rP  r~  rn  r   )rY   r0   rf   r>   r  rg   r=  r+  rT  rU  rV  rW  r  r  r   past_seen_tokensprotein_featsr  protein_outputscausal_maskr   rU   decoder_layerr   s                           r4   rl   zEvollaModel.forward  s   D -t";<YZZ  --i8M0*$++>O!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6L!(-C-O"22+5 3 O ,FFM!&!''*(//jj" );;'))+
 &"oom,oW![[ 	M)*) /#-"/$3'#5%9- .$7 M	& 		-0(++
 r6   )NNNNNNNNNNNNN)rs   rt   ru   r%   rC   r=  r@  r   r!   r#   r,   r  r   r
   r   rZ  r   r   rl   rw   rx   s   @r4   rL  rL    s   | *!"  .2.204(,26!%26596:48.248.2d##d*d t+d &&-	d
 d ((4/d $;d ((4/d !++d2d !&t 3d **T1d $$t+d $llT1d t+d  
(	(!d    dr6   rL  c                   4    e Zd Z fdZd Zd Zee	 	 	 	 	 	 	 	 ddej                  dz  dej                  dz  dej                  dz  dej                  dz  d	ej                  dz  d
ej                  dz  dedz  deej                  z  fd              Z xZS )EvollaForProteinText2Textc                     t         |   |       t        |      | _        |j                  | _        t        j                  |j                  | j                  d      | _        | j                          y rm  )
rB   rC   rL  r  rE   r   r   rF   lm_headr9  rX   s     r4   rC   z"EvollaForProteinText2Text.__init__  sQ      (
 ++yy!3!3T__5Qr6   c                 6    | j                   j                         S r   )r  r=  r<  s    r4   r=  z.EvollaForProteinText2Text.get_input_embeddings  s    zz..00r6   c                 8    | j                   j                  |      S r   )r  r@  r?  s     r4   r@  z.EvollaForProteinText2Text.set_input_embeddings  s    zz..u55r6   Nr0   rf   rg   labelsrT  rU  r=  logits_to_keepc	           
      h    | j                   d||||||d|	}
|
j                  }t        |t              rt	        | d      n|}| j                  |dd|ddf         }d}|  | j                  d||| j                  d|	}t        |||
j                  |
j                  |
j                        }|S )a,  
        protein_input_ids (torch.LongTensor):
            The input IDs for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.LongTensor`.
        protein_attention_mask (torch.Tensor):
            The attention mask for the protein sequence. Should be of shape `(batch_size, protein_seq_length)` and type `torch.Tensor`.

        Example:

        ```python
        >>> from transformers import EvollaProcessor, EvollaForProteinText2Text
        >>> model = EvollaForProteinText2Text.from_pretrained("westlake/Evolla-10B-hf")
        >>> processor = EvollaProcessor.from_pretrained("westlake/Evolla-10B-hf")

        >>> protein_information = {
            "aa_seq": "your amino acid sequence",
            "foldseek": "your foldseek sequence",
        }
        >>> question = "What is the function of this protein?"
        >>> message = [
            {"role": "system", "content": "You are an AI expert that can answer any questions about protein."},
            {"role": "user", "content": question},
        ]

        >>> inputs = processor(proteins=[protein_information], messages_list=[message], return_tensors="pt", padding="longest")
        >>> outputs = model.generate(**inputs)

        >>> print(processor.batch_decode(outputs, skip_special_tokens=True))
        ```)r0   rf   rg   rT  rU  r=  N)logitsre  rE   )lossrh  r  r   r(  r  )r  r  r,  r+   slicerb  loss_functionrE   r   r  r   r(  )rY   r0   rf   rg   re  rT  rU  r=  rf  r   outputsr   slice_indicesrh  ri  
lm_outputss                   r4   rl   z!EvollaForProteinText2Text.forward  s    T ,64:: ,
)'/#9,
 ,
  118B>SV8W~ot4]kmA}a,?@A%4%%iVFtibhiD+#33!//))

 r6   )NNNNNNNr   )rs   rt   ru   rC   r=  r@  r   r   r,   r  r   r   rZ  r+   rl   rw   rx   s   @r4   r`  r`    s    16  .2.226*.596:!%-.B##d*B t+B ((4/	B
   4'B !++d2B !&t 3B $;B ell*B  Br6   r`  )r`  rL  r@  )Nr]   )r$   )^r   collections.abcr   dataclassesr   typingr   r,   r    r   r-  activationsr	   cache_utilsr
   r   
generationr   integrationsr   r   r   masking_utilsr   r   modeling_layersr   modeling_outputsr   r   r   r   r   modeling_rope_utilsr   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr    r!   utils.output_capturingr"   r#   configuration_evollar%   r&   r5   Moduler8   r   r   r   r   rc   r   r   r   r   r   r   r   r   r  r  r%  r6  rG  rk  rv  r  r  r  r  r  r	  r  r  r+   r  r!  r2  r@  rL  r`  __all__r  r6   r4   <module>r     s=  *  $ !    & ! . ) f f J 9  L F & I I G E <4 ^=RYY ^=B(
2*
")) *
f !%II%<<% 
% <<	%
 LL4'% T\% % '(%8M)		 M)`
RYY 
BII 8;ryy 
 
42 4nS")) S@  2/ 2 2.+
!< +
\7 		 7 tA		 A'.		 '.T <k <  <
299 
$k")) k\ Y'JBII J (J(><BII ><B		  ( *+ ,2	UU\\ 	U# 	U%,, 	U )*C)bii C) +C)LE3 EP <O < <BC' CLS 5 Sl Pr6   