
    qi,                        d Z ddlZddlZddlZddlmZmZmZ ddlm	Z	 ddl
ZddlmZ eeej                  f   Zeee	f   ZdZ ej&                  d	       G d
 d             ZdedefdZddededee   fdZdededefdZdedefdZdedej                  fdZ	 	 	 	 	 ddededej                  dz  dej                  dz  dedz  dee   dz  dee   dz  defdZy) zProtein data type.    N)IteratorMappingSequence)Any   )residue_constantsg{Gz?T)frozenc                      e Zd ZU dZej
                  ed<   ej
                  ed<   ej
                  ed<   ej
                  ed<   ej
                  ed<   dZej
                  dz  ed<   dZe	dz  ed	<   dZ
ee	   dz  ed
<   dZee   dz  ed<   y)Proteinz!Protein structure representation.atom_positionsaatype	atom_maskresidue_index	b_factorsNchain_indexremarkparentsparents_chain_index)__name__
__module____qualname____doc__npndarray__annotations__r   r   strr   r   r   int     `/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/esm/openfold_utils/protein.pyr   r   "   s    + JJ JJ zz ::
 zz &*Kd") FC$J %)GXc]T!( 15#-4r   r   proteinnet_strreturnc                    d}t        j                  ||       D cg c]!  }t        |      dkD  s|j                         # }}t	        |dd d   |dd d   D cg c]  }|j                  d       c}      }g d}d }d }d }	|D ]}  }
|
d   dk(  r|
d   d   j                         }|D cg c]  }|t
        j                  v r|nd }}t        j                  |D cg c]0  }t
        j                  j                  |t
        j                        2 c}      }|
d   d	k(  rg }t        d
      D ]>  }|j                  t        t        t         |
d   |   j                                            @ t        j                  |      }t        j"                  t        |d         d
z  t
        j$                  d
f      j'                  t        j(                        }t+        |      D ]?  \  }}t        j,                  |d d |d d
f         |d d t
        j.                  |   d d f<   A |t0        z  }|
d   dk(  st        j                  t        t        dddj                  |
d   d   j                                           }t        j"                  t        |      t
        j$                  f      j'                  t        j(                        }	t+        |      D ]  \  }}d|	d d t
        j.                  |   f<   ! |	|d   z  }	 |J t3        ||	|t        j4                  t        |            d       S c c}w c c}w c c}w c c}w )Nz(\[[A-Z]+\]\n)r      r   
)NCACz	[PRIMARY]Xz
[TERTIARY]   z[MASK])-+).N)r   r   r   r   r   )resplitlenstripzipr   restypesr   arrayrestype_ordergetrestype_numrangeappendlistmapfloatzerosatom_type_numastypefloat32	enumerate	transpose
atom_orderPICO_TO_ANGSTROMr   arange)r!   tag_retagtagslgroupsatomsr   r   r   gseqchar
res_symboltertiaryaxistertiary_npiatommasks                       r    from_proteinnet_stringrU   H   s   F.0hhv~.N_sRUVYRZ]^R^syy{_D_.1$qt!t*VZ[\[_^_[_V`>aQRqwwt}>a.bF'EFNI )Q4;A$q'--/CSVW44#4#=#==43FWCWXXruvdn"0044ZARA^A^_vF qT\!*,Ha FS!T
0@0@0B%C DEF((8,KXXs8A;'71'<>O>]>]_`&abiijljtjtuN$U+ n4KM<<XcdeghgkjkgkdkXlKmq"3">">t"DaGHn..NqTX88D11%5%9%91Q47==?!KLMDI%33
 fRZZ   %U+ E4CD	!.99$??@Ei(I7): %iiF, M `>a Xvs   L5L5L:
*L?5M
protchain_idc                 F   g }| j                   }||j                  d|        | j                  }| j                  }|&|$t	        ||      D cg c]  \  }}||k(  s| }}}|t        |      dk(  rdg}|j                  ddj                  |              |S c c}}w )NREMARK r   N/APARENT  )r   r8   r   r   r1   r/   join)rV   rW   pdb_headersr   r   r   rR   ps           r    get_pdb_headersr`   y   s    K[[FWVH-.llG222>!$%8'!BTAa8m1TT#g,!+''!2 345 Us   BBpdb_strc                    g }|j                  d      }| j                  }||j                  d|        | j                   t	        | j                        dkD  rg }| j
                  i }t        | j                  | j
                        D ]=  \  }}|j                  t        |      g        |t        |         j                  |       ? t        d |D              }	t        |	dz         D ]/  }|j                  t        |      dg      }
|j                  |
       1 n)|j                  t        | j                               ndgg}dt        t           dt        fd	}|j                   ||d                d}t        |      D ]a  \  }}d
|vrd|vr|j                  |       d|v s$d||dz      vs/|dz  }|t	        |      k\  s||   }
ndg}
|j                   ||
             c dj                  |      S )zWAdd pdb headers to an existing PDB string. Useful during multi-chain
    recycling
    r%   rY   r   c              3   2   K   | ]  }t        |        y w)N)r   ).0	chain_idxs     r    	<genexpr>z"add_pdb_headers.<locals>.<genexpr>   s     FY#i.Fs   r   rZ   r_   r"   c                 *    ddj                  |        S )Nr[   r\   )r]   )r_   s    r    make_parent_linez)add_pdb_headers.<locals>.make_parent_line   s    !&&r   PARENTREMARKTEREND)r.   r   r8   r   r/   r   r1   
setdefaultr   maxr7   r5   r9   r   r@   r]   )rV   ra   out_pdb_lineslinesr   parents_per_chainparent_dictr_   rR   max_idxchain_parentsrh   chain_counterrH   s                 r    add_pdb_headersrv      s     "MMM$E[[Fwvh/0 ||C$5$9##/02KDLL$*B*BC .1&&s1vr2CF#**1-. F+FFG7Q;' 8 +A @!((78 $$T$,,%78#WI'HSM 'c ' )*;A*>?@M%  
B11!2  #A:%uQU|3QM C(9$:: 1- @!&  !1-!@A
B 99]##r   c                 B  ! t         j                  dgz   !dt        dt        f!fd}t         j                  }g }| j
                  }| j                  }| j                  }| j                  j                  t        j                        }| j                  }| j                  }	t        j                  |t         j                  kD        rt!        d      t#        |       }
t%        |
      dkD  r|j'                  |
       |j(                  d   }d}d}t*        j,                  }d}t/        |      D ]@  } |||         }t1        |||   ||   ||         D ]  \  }}}}|d	k  rd
}t%        |      dk(  r|nd| }d}d}d}|d   }d}d}|	||	|      }|d|dd|d|d|dd|d||   d|dd|d   d|d   d|d   d|d|dd|d|d}|j3                  |       |dz  } ||dz
  k(  }|	||dz
  k7  r|	|dz      |k7  r
d}|	|dz      }|sd}|d|dd |||         dd|d||   d} |j3                  |        |dz  }||dz
  k7  s&|j'                  t#        | |             C |j3                  d       |j3                  d       d j5                  |      S )!zConverts a `Protein` instance to a PDB string.

    Args:
      prot: The protein to convert to PDB.

    Returns:
      PDB string.
    r)   rr"   c                 J    t         j                  j                  |    d      S )NUNK)r   restype_1to3r5   )rx   r2   s    r    res_1to3zto_pdb.<locals>.res_1to3   s      --11(1+uEEr   zInvalid aatypes.r   r   Ng      ?ATOM   r\    g      ?Az<6z>5z<4z>1z>3z>4z   z>8.3fr$   z>6.2fz
          z>2Trk   z      rl   r%   )r   r2   r   r   
atom_typesr   r   r   r   r>   r   int32r   r   anyr6   
ValueErrorr`   r/   extendshapestringascii_uppercaser7   r1   r8   r]   )"rV   r|   r   	pdb_linesr   r   r   r   r   r   headersn
atom_indexprev_chain_index
chain_tags	chain_tagrR   
res_name_3	atom_nameposrT   b_factorrecord_typenamealt_locinsertion_code	occupancyelementcharge	atom_lineshould_terminate	chain_endchain_termination_liner2   s"                                    @r    to_pdbr      sM    !))SE1HFC FC F #--JII[[F((N&&--bhh7MI""K	vvf(4445+,,d#G
7|a!QAJ''JI1X 0JfQi(
.1*nQ>OQZ[\Q]_hij_k.l 	*IsD(cz K #I! 391YKDGNIlGFI&&{1~6	 r":b/4)GB<b/9R. #B'r':#q6%.Qs1venU#HU#3:2,vbk+  Y'!OJ5	8 A:"AEzk!a%04DD#' #.q1u#5 IR.Bvhvay6I"5MQyY[n]jkl]mnp\qr # 34!OJAEz   7G!HIa0Jd UR99Yr   c                 <    t         j                  | j                     S )ao  Computes an ideal atom mask.

    `Protein.atom_mask` typically is defined according to the atoms that are reported in the PDB. This function
    computes a mask according to heavy atoms that should be present in the given sequence of amino acids.

    Args:
      prot: `Protein` whose fields are `numpy.ndarray` objects.

    Returns:
      An ideal atom mask.
    )r   STANDARD_ATOM_MASKr   )rV   s    r    ideal_atom_maskr     s     //<<r   featuresresultr   r   r   r   r   c                 |    t        | d   |d   |d   | d   dz   ||nt        j                  |d         ||||	      S )a  Assembles a protein from a prediction.

    Args:
      features: Dictionary holding model inputs.
      result: Dictionary holding model outputs.
      b_factors: (Optional) B-factors to use for the protein.
      chain_index: (Optional) Chain indices for multi-chain predictions
      remark: (Optional) Remark about the prediction
      parents: (Optional) List of template names
    Returns:
      A protein instance.
    r   final_atom_positionsfinal_atom_maskr   r   )	r   r   r   r   r   r   r   r   r   )r   r   
zeros_like)r   r   r   r   r   r   r   s          r    from_predictionr   +  s]    * !45*+/!3(4)"--O`Ha:b/
 
r   )r   )NNNNN)r   dataclassesr-   r   collections.abcr   r   r   typingr   numpyr   r   r   r   r   FeatureDictModelOutputrC   	dataclassr   rU   r   r9   r`   rv   r   r   r   r   r   r    <module>r      sv      	  7 7    c2::o&c3h  d#"5 "5 $"5J.3 .7 .b' S c (/$' /$C /$C /$dZ  Z S Z z=' =bjj =$ $(%)$(04 zzD  d"	
 $J c]T! "#- r   