
    qiGn                        d dl mZmZ  e       r
ddlZddlmZ ddlmZ d dlmZ d dl	m
Z
mZ  ej                  e      Zg dZed	        Z G d
 de      Z G d de      Z G d de      Z G d de      Zd Zd Zej.                  dddej0                  dedej4                  fdZej.                  dddej0                  dedej4                  fdZ G d dej:                        Zd Zd Z d Z!d  Z"d! Z#d" Z$d%d#e%e&   dz  fd$Z'y)&   )is_torch_availablelogging    N)nn)contextmanager)ConversionOps)get_module_from_nameshould_convert_module)g        g      ?g      ?g      ?g       @g      @g      @g      @g       g      g      g      g       g      g      g      c              #     K   t               rdd l}t        | |j                        r| j                  } n"t        | t
              r |j                  |       } t        | dd       }|dk(  r*|j                  j	                  |       5  d  	 d d d        y |dk(  r6t        |d      r*|j                  j	                  |       5  d  	 d d d        y d  y # 1 sw Y   IxY w# 1 sw Y   xY ww)Nr   typecudaxpu)
r   torch
isinstanceTensordevicestrgetattrr   hasattrr   )devr   dev_types      Q/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/mxfp4.py	on_devicer   1   s     c5<<(**CS!%,,s#C3-v""3'   u!6!!#&   
  s6   BC*C	5C*>CC*CC*C'#C*c                       e Zd Zd Z	 	 	 d	deeej                  f   dej                  j                  dz  de
e   dz  dedz  deeej                  f   f
dZy)
Mxfp4Quantizec                     || _         y Nhf_quantizerselfr   s     r   __init__zMxfp4Quantize.__init__H   
    (    N
input_dictmodelmissing_keysfull_layer_namereturnc                    t        |j                               d   \  }}t        |t              r|d   n|}t	        ||      \  }}t        j                  |j                        5  t        |t              rt        |j                  dd      t              \  }	}
t        j                  j                  t        j                  j                  t        j                  j                  }}}t        |	|
t              \  }	}
d|v rdnd}||j                   v r|j                   |= t#        |||	       t#        || d ||
 | |                          |j%                  |        d	|_        i cd d d        S 	 d d d        y # 1 sw Y   y xY w)
Nr   gate_up_proj	down_proj_precision_configrhs_dataweight_scaleflex_ctxT)tupleitemsr   listr	   r   r   Mxfp4GptOssExpertsquantize_to_mxfp4	transposetriton_kernels_hub
matmul_ogsPrecisionConfigFlexCtx
InFlexDataswizzle_mxfp4_parameterssetattrdiscard_is_hf_initialized)r!   r%   r&   r'   r(   kwargs_valuemoduletriton_weight_tensorr3   r=   r>   r?   projs                  r   convertzMxfp4Quantize.convertK   ss    ))+,Q/5&ud3a(@	\\%,,' 	&"455FuWY[]G^`r5s2$l&11AA&1199&11<< +5
 6C(,8J62$l *8?)J~P[6---**40&:;f-.#YcYeHfg $$'8:,0)9	 	5	 	 	s   %DE==FNNN__name__
__module____qualname__r"   dictr   r   r   r   Moduler7   rK    r$   r   r   r   G   s}    ) )-)-&*)ell*+) xx%) 3i$&	)
 t) 
c5<<	 )r$   r   c                       e Zd Zd Z	 	 	 d	deeej                  f   dej                  j                  dz  dedz  de
e   dz  deeej                  f   f
dZy)
Mxfp4Dequantizec                     || _         y r   r   r    s     r   r"   zMxfp4Dequantize.__init__x   r#   r$   Nr%   r&   r(   r'   r)   c                 z   i }d|v rdnd}| d|j                         v r6t        || d   t              r|| d   d   || d<   n|| d   || d<   | d|j                         v r6t        || d   t              r|| d   d   || d<   n|| d   || d<   t        || d   || d         }||iS )Nr-   r.   _blocksr   _scales)keysr   r7   dequantize_convertops)	r!   r%   r&   r(   r'   rE   
param_datarJ   dequantizeds	            r   rK   zMxfp4Dequantize.convert{   s!    
!/?!B~V7z00*vW%56=/9TF':J/KA/N
dV7+,/9TF':J/K
dV7+,V7z00*vW%56=/9TF':J/KA/N
dV7+,/9TF':J/K
dV7+, ,J$w7G,H*X\W]]dUeJfg--r$   rL   rM   rS   r$   r   rU   rU   w   s}    ) )-&*)-.ell*+. xx%. t	.
 3i$&. 
c5<<	 .r$   rU   c                       e Zd Zd Z	 	 	 d
deeej                  f   dej                  j                  dz  dedz  de
e   dz  deeej                  f   f
dZedefd	       Zy)Mxfp4Deserializec                     || _         y r   r   r    s     r   r"   zMxfp4Deserialize.__init__   r#   r$   Nr%   r&   r(   r'   r)   c           	         i }d|v rdnd}| d|j                         v r6t        || d   t              r|| d   d   || d<   n|| d   || d<   | d|j                         v r6t        || d   t              r|| d   d   || d<   n|| d   || d<   t        ||      \  }}	t	        || d   || d   |||| d   j
                  t               |j                  |        d|_        i S )Nr-   r.   rX   r   rY   T)	rZ   r   r7   r	   swizzle_mxfp4_convertopsr   r;   rC   rD   )
r!   r%   r&   r(   r'   rE   r\   rJ   rH   rF   s
             r   rK   zMxfp4Deserialize.convert   se    
!/?!B~V7z00*vW%56=/9TF':J/KA/N
dV7+,/9TF':J/K
dV7+,V7z00*vW%56=/9TF':J/KA/N
dV7+,/9TF':J/K
dV7+, )@	 $w'($w'($w'(//	
 	02$(! 	r$   c                 ,    t        | j                        S r   )Mxfp4ReverseDeserializer   )r!   s    r   
reverse_opzMxfp4Deserialize.reverse_op   s    &t'8'899r$   rL   )rN   rO   rP   r"   rQ   r   r   r   r   rR   r7   rK   propertyr   re   rS   r$   r   r_   r_      s    ) )-&*)-%ell*+% xx%% t	%
 3i$&% 
c5<<	 %N :M : :r$   r_   c                       e Zd Zd Z	 	 	 d	deeej                  f   dej                  j                  dz  dedz  de
e   dz  deeej                  f   f
dZy)
rd   c                     || _         y r   r   r    s     r   r"   z Mxfp4ReverseDeserialize.__init__   r#   r$   Nr%   r&   r(   r'   r)   c                    t        |j                  dd      }t        |j                  dd      }d|v rdnd}|j                  dd      d	   }	t        ||      \  }
}i }t	        |
t
              rd
|v r&|j                  dd      }	t        |
|dz         ||	<   |S d|v r|
j                  j                  j                  j                  |
j                  j                  j                        j                  dd      j                  |ddd      ||	 d<   |
j                  j                  j                  j                  j                  |
j                  j                  j                  j                        j                  dd      ||	 d<   |S |
j                   j                  j                  j                  |
j                   j                  j                        j                  dd      j                  ||dd      ||	 d<   |
j"                  j                  j                  j                  j                  |
j"                  j                  j                  j                        j                  dd      ||	 d<   |S )Nnum_local_experts    hidden_sizei@  r-   r.   rF      r   biasrX    _biasr+   r,   Z      rY   )r   configrsplitr	   r   r8   replacer-   storagelayoutunswizzle_datadatar:   reshapegate_up_proj_precision_configr3   r.   down_proj_precision_config)r!   r%   r&   r(   r'   rE   rj   rl   rJ   namerH   rF   
state_dicts                r   rK   zMxfp4ReverseDeserialize.convert   s>    $ELL2ErJellM4@!/?!B~%%c1-a0(@	
f01(&..y"=#*64'>#B
4 !!0''//66EEfFYFYFaFaFfFfgYr2&W.B; dV7+, 88EEMMTTcc<<IIQQVViB' dV7+,"  $$,,33BB6CSCSC[C[C`C`aYr2&W.RD dV7+, 55BBJJQQ``99FFNNSSiB' dV7+, r$   rL   rM   rS   r$   r   rd   rd      s}    ) )-&*)-,ell*+, xx%, t	,
 3i$&, 
c5<<	 ,r$   rd   c                     |j                   j                  j                  } || j                  t        j
                        t        j                  d      \  } }| |fS )Nrm   )axis)numerics_detailsmxfpdowncast_to_mxfp_torchtor   bfloat16uint8)wr;   r   w_scales       r   r9   r9      sH    /@@EE\\'U^^(<ekkPQRJAwg:r$   c                 f   |j                   j                  |j                   j                  |j                   j                  }}}|j                  j
                  }|j                  j
                  j                  }|j                  d      \  }}	 | || |      |fi |	}  | ||      |      }| |fS )zE
    Changes the layout of the tensors depending on the hardware
    rm   )mx_axisdtype)tensorFP4convert_layoutwrap_torch_tensortensor_detailsrw   StridedLayout"make_default_matmul_mxfp4_w_layout)
r   r   r;   r   r   r   rw   r   value_layoutvalue_layout_optss
             r   r@   r@      s    
 	!!%%!!00!!33 +C
  ..55F&55<<JJM&,&O&OXY&O&Z#L#(#6ZHYZA.w7GGg:r$   i   r   rows_per_chunkr   r   r)   c                4   ddl }| j                  t        j                        } |j                  t        j                        dz
  }| j
                  dd |j
                  k(  s$J d| j
                  dd d|j
                         t        j                  t        || j                        }| j
                  ^ }}}|j                  |      |z  }	| j                  |	|      } |j                  |	d      }t        j                  |	|d	z  || j                        }
t        d|	|      D ]  }t        ||z   |	      }| || }||| }|
|| }|d
z  j                  t        j                        }||   |ddddd	f<   ~|dz	  j                  t        j                        }||   |ddddd	f<   ~t        j                  |||       ~~~   |
j                  g |||d	z   j                   g |||z  d	z   }
|
j#                  dd	      j%                         S )w
    Convert the mxfp4 weights again, dequantizing and makes them compatible with the forward
    pass of GPT_OSS.
    r   N   r+   zblocks.shape[:-1]=z does not match scales.shape=)r   r   rm   r         )out)mathr   r   r   int32shaper   
FP4_VALUESr   prodrz   emptyrangeminintldexpviewr:   
contiguous)blocksscalesr   r   r   lutprefix_shapeGB
rows_totalr   r0r1blkexpsubidx_loidx_his                     r   _convert_moe_packed_tensorsr     s    YYu{{#FYYu{{#c)F<<,d1Ccr1B0DDbU[UaUaTc.dd,
,,zv}}
EC ,,\1a<(1,J^^J*F^^J*F
++j!a%uV]]
KCAz>2 n$j1RmRm"Rj *+6{Aqt!tG (uyy)6{Aqt!tG 	C#&c'* 4+#++
.|
.Q
.A
.
3
3
M\
M1q519
MC==A))++r$   c                    	 t        | |||      S # t        j                  $ r4 | j                  d      } |j                  d      }t        | |||      cY S w xY w)r   r   cpu)r   r   OutOfMemoryErrorr   )r   r   r   r   s       r   convert_moe_packed_tensorsr   I  sa    g*66Weff !! g5!5!*66Weffgs    AAAc                   V     e Zd Z fdZdej
                  dej
                  fdZ xZS )r8   c           	         t         |           |j                  | _        |j                  | _        |j
                  | _        t        j                  t        j                  | j                  d| j                  z  | j
                  dz  dt        j                        d      | _        t        j                  t        j                  | j                  d| j                  z  t        j                        d      | _        t        j                  t        j                  | j                  | j
                  | j                  dz  dft        j                        d      | _        t        j                  t        j                  | j                  | j
                  t        j                        d      | _        d| _        t#        |dd	      | _        d | _        d | _        t#        |dd	      | _        y )
Nr   rk   rr   r   Frequires_gradgZd;?swiglu_limitg      @)superr"   rj   num_expertsintermediate_sizerl   r   	Parameterr   zerosr   r-   float32gate_up_proj_biasr.   down_proj_biasalphar   limitr{   r|   )r!   rs   	__class__s     r   r"   zMxfp4GptOssExperts.__init__c  s{   !33!'!9!9!--LLKK((!d.D.D*DdFVFVZ\F\^`hmhshst

 "$KK((!d.D.D*DEMMZjo"
 KK))4+;+;T=S=SWY=Y[]^fkfqfqr

 !llKK(($*:*:%--P`e
 
V^S9
-1**.'V^S9
r$   hidden_statesr)   c                    t         j                  j                  t         j                  j                  t         j                  j                  }}}t         j                  j
                  }t        |j                        5   | |d|d      | j                  | j                  fd      }	 ||| j                  | j                  j                  t        j                        ||| j                  d |	      }
 ||
| j                   | j"                  j                  t        j                        ||| j$                  |j&                        }d d d        |S # 1 sw Y   S xY w)Nswiglu)r   r   r   )gather_indxprecision_configgammasfused_activation)scatter_indxr   r   )r;   r<   FnSpecsFusedActivationr   	swiglu_fnr   r   r   r   r-   r   r   r   r   r{   r.   r   r|   	gate_scal)r!   r   routing_data
gather_idxscatter_idxr   r   r<   r   actintermediate_cache1intermediate_cache3s               r   forwardzMxfp4GptOssExperts.forward  s0   ))11))99))44 #-
 '--77	}++, 	!'(I?Q"RUYU_U_aeakakTlnopC",!!&&))%--8&!%!C!C!$	# #-###&&u}}5(!%!@!@#--#	. #"/	. #"s   >CEE)rN   rO   rP   r"   r   r   r   __classcell__)r   s   @r   r8   r8   b  s&    :<#U\\ #]b]i]i #r$   r8   c                 B   dd l }t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  f\  }}}}t        | j                        5  t        j                  j                         }t        |j                  j                  dd            }d}	| j                  d   }
| j                  d   }||z  }||z  }|dz   |z  }|
|z  }d } || |      \  }}t        j                   |d      }t        j"                  |d      \  }}t        j$                  |d|      }|j'                  d      }t        j(                  |||dz
        || }|j+                  d      j-                  t        j.                        }d	}t        j0                  ||k  ||      }t        j2                  |d
      j-                  t        j.                        }t        j2                  |      j-                  t        j.                        }t        j0                  ||k  ||	      }t        j0                  ||k  ||	      }t        j0                  ||	k(  |	|      }||   }t        j0                  ||   |	k(  |	|      } ||j                         |j                               } ||j                         |j                               } ||||      }|}d d d         |      fS # 1 sw Y   xY w)Nr   
LOCAL_RANK0r+   rm   c                     t        j                  |  dd      d d d |f   }|j                         }t        j                  | |d      }||j	                         fS )Nrm   T)dimstabler   )r   argsortlongtake_along_dimr   )valsktk_indxtk_vals       r   topkz routing_torch_dist.<locals>.topk  sS    mmTEq>q"1"uEGllnG))$Q?F7;;=((r$   r   )binsmaxi  T)r   )src_indxdst_indx)osr;   routing
GatherIndxRoutingDataScatterIndxcompute_expt_data_torchr   r   r   distributedget_world_sizer   environgetr   softmaxsortgatherrz   histcr   r   r   wherer   )logitsn_expts_actr   r   r   r   r   
world_sizerankreplace_valuen_tokensn_expts_totn_local_expertslocal_expert_startlocal_expert_endn_gates_padr   	expt_scal	expt_indxsort_indiceshistvar	topk_indx	gate_indxr   r   r   	expt_datahit_expertss                                r   routing_torch_distr    s     	""--""..""..""::	EAJ[*A 
6==	! 3"&&557
2::>>,45<<?ll1o%3!O3 1H7,	)  $FK8	9MM)4	"'**YA">	<LLA|<	 %%b)	{{9;K!OLM_`pqNN2&))%++6	 KK	,> >YO	MM)D9<<U[[I	MM),//<	KK	,< <iW	KK 2i ?MZ	KK	] :M9U	i(	KK	) 4 E}V_`	 !)--/IMMOT"IMMOimmoV+D/;O	!g3"h y$iPR]_kkki3" 3"s   I9LLc                    dd l m} |j                         r#|j                         rt	        | d      rt
        }nt        j                  j                  }|j                  d   }|j                  d| j                  j                        }t        j                  j                  || j                  j                  | j                  j                         }t#        |j$                        5   ||| j                  j&                        \  }}}d d d        | j)                  |      }	|	j                  |d| j                  j                        }	|	|fS # 1 sw Y   IxY w)Nr   
_is_hookedr+   )r   )torch.distributedr   is_availableis_initializedr   r  r;   r   r   rz   router
hidden_dimr   
functionallinearweightrn   r   r   top_kexperts)
r!   r   distr   
batch_sizerouter_logitsr   r   r   
routed_outs
             r   mlp_forwardr(    s   $t224|9T$$,,44$$Q'J!))"dkk.D.DEMMM((8J8JDKKL\L\]M	=''	( Z07t{{GXGX0Y-j+Z m\:S^_J##JDKK4J4JKJ}$$Z Zs   '"EEc                    ddl m} |j                  d      }|j                  d      }|j                  d      }	|j                  d      }
|j                  d      }|j                  d      }d	D ]  }||v s| ||||||	|
||      }| d
}| d}t        | |j	                  dd      d   |       t        | |      sPt        | |      s]t        t        | |      t        | |            }t        | |t        j                  j                  |j                  |                   t        | |       t        | |        y )Nr   shard_and_distribute_moduler&   empty_paramcasting_dtypeto_contiguousr  device_mesh)r-   r.   rX   rY   .rm   )integrations.tensor_parallelr+  r   rB   rt   r   r   r   r   r   r   r   delattr)rH   
param_nameparam_valuetarget_devicedq_param_namerE   r+  r&   r,  r-  r.  r  r/  rJ   blocks_attrscales_attrr]   s                    r   
dequantizer9    s8   JJJwE**]+KJJ/MJJ/M::fD**]+K- -:&9!!!	 "F'*K!F'*KFJ--c15a8+Fv{+0L89UW^_egrWstehh&8&89V&WX,,)-r$   c                 X    t        | |      }t        j                  j                  |      S r   )r   r   r   r   )r   r   r]   s      r   r[   r[     s#    ,VV<K88k**r$   c                 Z   |j                   j                  |j                   j                  |j                   j                  }}}ddlm}	 |j                  d      }
|j                  d      }|j                  d      }|j                  d      }|j                  d      }|j                  d      }d	|v r&|j                  d
      d   j                  d      d   }d|v r&|j                  d
      d   j                  d      d   }| |	|
|||||||       n?t        | |j                  d
d      d   t        j                  j                  |d              d}| d}t        | |      }t        | |      }|j                  j                  dk7  r|j                  j                  dk7  r|j!                  d      }|dk(  r!|j#                  || j$                  dz  d      }n |j#                  |d| j$                  dz        }t        |d|      dk(  rVt'        t        d      rFt        j(                  j+                         (t        j(                  j+                         j                  }|j-                  |      j/                         }|j-                  |      j/                         }t1        |      5  t3        |j5                  dd      |j5                  dd      |      \  }}ddd       |dk(  r5t        j6                  || j8                  | j$                  dz  g      _        n1t        j6                  || j$                  | j8                  g      _        t        | ||       t        | | d | | |                          t=        | |       t=        | |       ~yyy# 1 sw Y   xY w)q
    This transforms the weights obtained using `convert_gpt_oss.py` to load them into `Mxfp4GptOssExperts`.
    r   r*  r&   r,  r-  r.  r  r/  r   r0  r+   rX   r   r   rY   Nrm   Fr   metar-   r   r   acceleratorr,   r/   r0   r2   )r<   r=   r>   r?   r1  r+  r   splitrB   rt   r   r   r   r   r   r   sizerz   r   r   r>  current_acceleratorr   r   r   r@   r:   Sizerl   r   r2  )rH   r3  r4  r5  r;   rE   r=   r>   r?   r+  r&   r,  r-  r.  r  r/  rJ   r7  r8  r   r   local_expertsrI   r3   s                           r   load_and_swizzle_mxfp4rD  #  sf   
 	%%55%%--%%00 )WO
 KJJwE**]+KJJ/MJJ/M::fD**]+K:$R(..y9!<:$R(..y9!<#;ZW[]h	
 	
))#q1!4ehh6H6Hdi6H6jkF'"KF'"KV[)FV[)F}}V#(:(:f(DA>!^^M63K3Ka3OQSTF^^M2v7O7OST7TUFM6=9UB}-!!557C!--AACHHM=)446=)446}% 	1>  R(&*:*:2r*BDV2. ,	 >!).]FDVDVX^XpXpstXt4u)v &).]FD\D\^d^p^p4q)r & 	23f%&Q[Q]@^_	
 	$$I )E#	 	s   01N!!N*c                    |j                   j                  |j                   j                  |j                   j                  }}}| j	                  d      }	t        |d|      dk(  rVt        t        d      rFt        j                  j                         (t        j                  j                         j                  }| j                  |      j                         } |j                  |      j                         }|dk(  r!| j                  |	|j                  dz  d      } n | j                  |	d|j                  dz        } t        |      5  t!        | j#                  d	d      |j#                  d	d      |      \  }
}ddd       |dk(  r5t        j$                  |	|j&                  |j                  dz  g      
_        n1t        j$                  |	|j                  |j&                  g      
_        ||j*                  v r|j*                  |= t-        |||
       t-        || d
 | | |                          y# 1 sw Y   xY w)r<  r   r   r   r>  Nr-   r   r+   r,   r/   r0   r2   )r<   r=   r>   r?   r@  r   r   r   r>  rA  r   r   r   rz   r   r   r@   r:   rB  rl   r   rA   rB   )r   r   rH   rJ   r5  r;   r=   r>   r?   rC  rI   r3   s               r   rb   rb   j  s    
 	%%55%%--%%00 )WO KKNMv}5>E=)113?))==?DDYY}%002FYY}%002F~v/G/G!/KRPr63K3Kq3PQ	=	! 
-:R$f&6&6r2&>@R.
*l

 ~%*ZZ@R@RTZTlTlopTp0q%r"%*ZZ@X@XZ`ZlZl0m%n" v!!!t$FD./&!"\GZ\<Z[!
 
s   	1H::Imodules_to_not_convertc                 $   |j                   r| S ddlm}  |d      ad}| j	                         D ]  \  }}t        ||      s|j                  j                  dk(  rQ|j                   sEt        j                  d      5  | j                  |t        | j                               d}ddd       |j                  j                  d	k(  s|j                   rd
dlm}  |t        |      |_         |st"        j%                  d       | S # 1 sw Y   cxY w)aD  
    Public method that replaces the expert layers of the given model with mxfp4 quantized layers.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`Mxfp4Config`, defaults to `None`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*, defaults to `None`):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
    rm   )
get_kernelz(kernels-community/gpt-oss-triton-kernelsFGptOssExpertsr=  TN	GptOssMLPr   )
MethodTypezYou are loading your model using mixed-precision FP4 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r9  hub_kernelsrH  r;   named_modulesr
   r   rN   r   r   set_submoduler8   rs   typesrK  r(  r   loggerwarning)r&   quantization_configrF  rH  has_been_replacedmodule_namerH   rK  s           r   replace_with_mxfp4_linearrU    s     %%' $$NO$224 
=V$[2HI$$7@S@^@^f% )##K1CELL1QR$(!) $$3<O<Z<Z('V<FN
= 	
 L) )s   <(DD	)NN)(utilsr   r   r   r   
contextlibr   core_model_loadingr   quantizers.quantizers_utilsr	   r
   
get_loggerrN   rP  r   r   r   rU   r_   rd   r9   r@   r   r   r   r   r   r   rR   r8   r  r(  r9  r[   rD  rb   r7   r   rU  rS   r$   r   <module>r[     sO   0  % . U 
		H	%
( 
 
*-M -`.m .<-:} -:`0m 0h0 &3, ;;	3,
 3, \\3,t &g ;;	g
 g \\g2># >#FAlH%(-B+
DN.b)W[\_W`cgWg )r$   