
    Iui*                        d dl Z d dlZd dlmZ d dlZd dlZd dlmZ d dl	m
Z
 d dlmZmZ ej                  j                  ej                  j                   ej                  j"                  dZh dZ G d d	e
      Zd
 Zd Zd Zd Zd Zd Zd Zd ZddZd Zd ZddZ ddZ!d Z"ddZ#ddZ$d Z%e&dk(  r e%        yy)     N)List)utils)	Converter)common_spectransformer_spec)gelureluswish>    dnndc                   ,    e Zd ZdZdedee   fdZd Zy)MarianConverterz$Converts models trained with Marian.
model_pathvocab_pathsc                      || _         || _        y)zInitializes the Marian converter.

        Arguments:
          model_path: Path to the Marian model (.npz file).
          vocab_paths: Paths to the vocabularies (.yml files).
        N)_model_path_vocab_paths)selfr   r   s      O/opt/pipecat/venv/lib/python3.12/site-packages/ctranslate2/converters/marian.py__init__zMarianConverter.__init__   s     &'    c           
         t        j                  | j                        }t        |      }t	        t        t        | j                              }|d   }d|d   v }|d   }t        j                         } ||d   dk(  d        ||d   d	k(  d
        ||d    d        ||t        v d|ddj                  t        j                               d        ||t        v d|ddj                  t              d       |r1 ||d   dk(  xr |d   dk(  xr |j                  dd      dk(  d       n0 ||d   dk(  xr |d   dk(  xr |j                  dd      dk(  d       |j                          |d   }|dk(  rdnt!        |      dz
  }d|v }	t"        j$                  j'                  |d   |d   f|d    |t        |   |d|	!      }
t)        |
|       |
j+                  |d"          |
j-                  |d          d#|
j.                  _        |
S )$Nztransformer-ffn-activationr   ztransformer-preprocessztransformer-postprocess-embtypetransformerz#Option --type must be 'transformer'ztransformer-decoder-autoregzself-attentionz=Option --transformer-decoder-autoreg must be 'self-attention'ztransformer-no-projectionz3Option --transformer-no-projection is not supportedz$Option --transformer-ffn-activation z. is not supported (supported activations are: z, )z%Option --transformer-postprocess-emb z) is not supported (supported values are: ztransformer-postprocessdaztransformer-postprocess-topr   zUnsupported pre-norm Transformer architecture, expected the following combination of options: --transformer-preprocess n --transformer-postprocess da --transformer-postprocess-top ndanzUnsupported post-norm Transformer architecture, excepted the following combination of options: --transformer-preprocess '' --transformer-postprocess dan --transformer-postprocess-top ''z"transformer-guided-alignment-layerlast   z	enc-depthz	dec-depthztransformer-heads)pre_norm
activationalignment_layeralignment_headslayernorm_embeddingr   T)nploadr   _get_model_configlistmap
load_vocabr   r   ConfigurationChecker_SUPPORTED_ACTIVATIONSjoinkeys_SUPPORTED_POSTPROCESS_EMBgetvalidateintr   TransformerSpecfrom_configset_transformer_specregister_source_vocabularyregister_target_vocabularyconfigadd_source_eos)r   modelr;   vocabsr$   r#   postprocess_embcheckr%   r'   
model_specs              r   _loadzMarianConverter._load#   sp   (()"5)c*d&7&78989
&!9:: !>?**,fVn-/TU015EEK	
 	233A	
 	00 499%;%@%@%BCE	
 	99		*D EG	
 /0C7 I45=IJJ<bASH2		 /0B6 H45>HJJ<bARG3		 	 !EF /6 9"s??SVW?W!_4%55AAK &"56&'-j9+ 3 B 

 	Z/--fQi8--fRj9+/
(r   N)__name__
__module____qualname____doc__strr   r   rB    r   r   r   r      s"    .(3 (T#Y (Ir   r   c                 `    | d   }|d d j                         }t        j                  |      }|S )Nzspecial:model.ymlr!   )tobytesyaml	safe_load)r=   r;   s     r   r*   r*   o   s4    &'FCR[  "F^^F#FMr   c           	      V   t        | d      5 }g }d }d }t        |      D ]&  \  }}|j                  d      }|s|j                  d      r|dd  }n||dd  }n|j	                  dd      \  }}||j                  d      rX|j                  d      rGt        j                  d	d
|      }|dd }|j                  d      rSt        t        |dd  d            }n9|j                  d      r(|j                  d      r|dd }|j                  dd      }|	 t        |j                               }|j                  ||f       d }d }) 	 d d d        t        d       D cg c]  \  }}|	 c}}S # t        $ r}t        d|dz   |fz        |d }~ww xY w# 1 sw Y   OxY wc c}}w )Nzutf-8)encodingz
z?    :r"   "z\\([^x])z\1r!   z\x   )base'z''z"Unexpected format at line %d: '%s'c                     | d   S )Nr   rH   )items    r   <lambda>zload_vocab.<locals>.<lambda>   s
    $q' r   )key)open	enumeraterstrip
startswithrsplitendswithresubchrr5   replacestrip
ValueErrorappendsorted)	pathvocabtokenstokenidxilinee_s	            r   r-   r-   v   s   	dW	% ' ' #	GAt;;v&Dt$QR"12h![[a0
s ##C(U^^C-@FF;u=E!!BKE''. #Cab	$; <%%c*u~~c/B!!BKE!MM$4Eciik*C sEl+G#		'R #)5I"JKhaEKK " $<At}L?' 'R Ls6   DFE40F$F%4	F=FFFF"c                 `    t        | j                  |d       t        | j                  |d       y )Nencoderdecoder)set_transformer_encoderrq   set_transformer_decoderrr   )specweightss     r   r8   r8      s"    DLL'9=DLL'9=r   c           	          t        | ||       t        | j                        D ]  \  }}t        ||d||dz   fz          y )N%s_l%dr"   )set_common_layersrZ   layerset_transformer_encoder_layerru   rv   scoperl   
layer_specs        r   rs   rs      sF    dGU+"4::. V:%j'8uaRSen;TUVr   c           	          d| _         t        | ||       t        | j                        D ]  \  }}t	        ||d||dz   fz          t        | j                  |d|z  | j                  j                         y )NTrx   r"   z%s_ff_logit_out)reuse_weight)	start_from_zero_embeddingry   rZ   rz   set_transformer_decoder_layer
set_linear
projection
embeddingsweightr|   s        r   rt   rt      sw    %)D"dGU+"4::. V:%j'8uaRSen;TUV E!__++	r   c                 d   | j                   }t        |t              s|g}t        |d   ||       t	        | j
                  ||d   j                  j                  d          t        | d      rt        | j                  |d|z  d       t        | d      rt        | j                  |d	|z         y y )
Nr   r"   )dimr'   z%s_embTr#   
layer_normz%s_top)r   
isinstancer+   set_embeddingsset_position_encodingsposition_encodingsr   shapehasattrset_layer_normr'   r   )ru   rv   r}   embeddings_specss       r   ry   ry      s    &-,-#A&7.>q.A.H.H.N.Nq.Q t*+$$u		
 t\"tE1AB #r   c                 p    t        | j                  |d|z         t        | j                  |d|z  d       y )N%s_ffn%s_selfTself_attention)set_ffnffnset_multi_head_attentionr   ru   rv   r}   s      r   r{   r{      s3    DHHgx%/0Wi%&7r   c                     t        | j                  |d|z         t        | j                  |d|z  d       t        | j                  |d|z         y )Nr   r   Tr   z
%s_context)r   r   r   r   	attentionr   s      r   r   r      sJ    DHHgx%/0Wi%&7 T^^WlU6JKr   c                 N   t        d      D cg c]  }t        j                          }}t        |d   ||d       t        |d   ||d       t        |d   ||d       |r$t	        j
                  | j                  d   |       nh|d   j                  | j                  d   _        |d   j                  | j                  d   _        t	        j
                  | j                  d   |dd         t        | j                  d   ||d	       t        | j                  |d
|z         y c c}w )N   r   qr"   krO   vr!   oz%s_Wo)ranger   
LinearSpecr   r   fuse_linearlinearr   biasset_layer_norm_autor   )ru   rv   r}   r   ro   split_layerss         r   r   r      s    6;Ah?K**,?L?|A4|A4|A4$++a.,7 ,Q 6 6A*1o22A$++a.,qr*:;t{{24'E/B @s   D"c                     t        | j                  |d|z         t        | j                  ||d       t        | j                  ||d       y )Nr   12)r   r   r   linear_0linear_1r   s      r   r   r      s;    (U2BCt}}guc2t}}guc2r   c                 \    	 t        | ||d       y # t        $ r t        | ||       Y y w xY w)NTr   )r   KeyErrorr   s      r   r   r      s0    -tWed; -tWe,-s    ++c                     |rdnd}||d|   j                         | _        ||d|   j                         | _        y )N_prer   	_ln_scale_ln_bias)squeezegammabeta)ru   rv   r}   r#   suffixs        r   r   r      s?    VRFE6:;CCEDJ%89AACDIr   c                     |j                  |d|      }||j                  |d||      }n|j                         }|| _        |j                  |d|      }||j                         | _        y y )N_W_Wt_b)r3   	transposer   r   r   )ru   rv   r}   r   r   r   r   s          r   r   r     so    [[UF34F~%8,G!!#DK;;5&12DLLN	 r   c                 |    |j                  d|z        | _        | j                  |j                  d      | _        y y )Nz%s_WembWemb)r3   r   r   s      r   r   r     s6    ++i%/0DK{{kk&) r   c                 D    |j                  dt        |            | _        y )NWpos)r3   #_make_sinusoidal_position_encodings	encodings)ru   rv   r   s      r   r   r     s    [[)LS)QRDNr   c                    t        j                  |      }t        j                  ddt        j                  |       dz  z  | z        }t        j                  |d      t        j                  |d      z  }t        j                  |      }t        j
                  |d d dd df         |d d d | dz  f<   t        j                  |d d dd df         |d d | dz  d f<   |S )Ni'  rO   r"   r   )r(   arangepowerexpand_dims
zeros_likesincos)r   num_positions	positions
timescalesposition_enctables         r   r   r     s    		-(I%biin&9!:S!@AJ>>)Q/"..Q2OOLMM,'E66,q!$Q$w"78E!ZsaxZ-66,q!$Q$w"78E!SAXZ-Lr   c                  J   t        j                  t         j                        } | j                  ddd       | j                  dddd	       t	        j
                  |        | j                         }t        |j                  |j                        }|j                  |       y )
N)formatter_classz--model_pathTzPath to the model .npz file.)requiredhelpz--vocab_paths+z'List of paths to the YAML vocabularies.)r   nargsr   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr   r   r   convert_from_args)parserargs	converters      r   mainr   '  s    $$ >>F ,J   6	   'D1A1ABI%r   __main__)F)r   N)N)i   )'r   r_   typingr   numpyr(   rK   ctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   
ActivationGELUSigmoidRELUSWISHr/   r2   r   r*   r-   r8   rs   rt   ry   r{   r   r   r   r   r   r   r   r   r   r   rC   rH   r   r   <module>r      s     	    ( 6 ; ""..""''##))  2 Vi Vr+L\>
VC(LC"3-D#*S&& zF r   