
    qi7                        d dl Z ddlmZ ddlmZ ddlmZmZmZm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddl m!Z! dd	l"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5 ddl6m7Z7 ddl8m9Z9 ddl:m;Z; ddl<m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE ddlFmGZG ddlHmIZI ddlJmKZK ddlLmMZM i de%d e)d!e+d"e9d#e!d$eCd%eEd&e5d'e7d(e/d)e;d*e=d+e-d,e1d-eKd.e'd/eMeIe3e#eAe?eGd0ZNi ded e
d!e
d(ed"ed#ed$ed%ed&ed'ed*ed+ed,ed)ed-ed.e	d/eeeeeeed0ZO ej                  eQ      ZR G d1 d2      ZS G d3 d4      ZTd5eUfd6ZVd7eUfd8ZWd9 ZXy):    N   )
AutoConfig)logging)
AqlmConfigAutoRoundConfig	AwqConfigBitNetQuantConfigBitsAndBytesConfigCompressedTensorsConfig
EetqConfigFbgemmFp8ConfigFineGrainedFP8ConfigFourOverSixConfigFPQuantConfig
GPTQConfigHiggsConfig	HqqConfigMetalConfigMxfp4ConfigQuantizationConfigMixinQuantizationMethodQuantoConfigQuarkConfig
SinqConfig
SpQRConfigTorchAoConfig
VptqConfig   )HfQuantizer)AqlmHfQuantizer)AutoRoundQuantizer)AwqQuantizer)BitNetHfQuantizer)Bnb4BitHfQuantizer)Bnb8BitHfQuantizer)CompressedTensorsHfQuantizer)EetqHfQuantizer)FbgemmFp8HfQuantizer)FineGrainedFP8HfQuantizer)FourOverSixHfQuantizer)FPQuantHfQuantizer)GptqHfQuantizer)HiggsHfQuantizer)HqqHfQuantizer)MetalHfQuantizer)Mxfp4HfQuantizer)QuantoHfQuantizer)QuarkHfQuantizer)SinqHfQuantizer)SpQRHfQuantizer)TorchAoHfQuantizer)VptqHfQuantizerawqbitsandbytes_4bitbitsandbytes_8bitgptqaqlmquantoquarkfouroversixfp_quanteetqhiggshqqzcompressed-tensors
fbgemm_fp8torchaobitnetvptq)spqrfp8z
auto-roundmxfp4metalsinqc                   6    e Zd ZdZedefd       Zed        Zy)AutoQuantizationConfigz
    The Auto-HF quantization config class that takes care of automatically dispatching to the correct
    quantization config given a quantization config stored in a dictionary.
    quantization_config_dictc           	      v   |j                  d      }|j                  dd      s|j                  dd      r*|j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr,t        d| d	t        t        j                                      t        |   }|j                  |      S )
Nquant_methodload_in_8bitFload_in_4bit_4bit_8bitThe model's quantization config from the arguments has no `quant_method` attribute. Make sure that the model has been correctly quantizedUnknown quantization type, got  - supported types are: )	getr   BITS_AND_BYTES
ValueError AUTO_QUANTIZATION_CONFIG_MAPPINGlistAUTO_QUANTIZER_MAPPINGkeys	from_dict)clsrN   rP   suffix
target_clss        N/opt/pipecat/venv/lib/python3.12/site-packages/transformers/quantizers/auto.pyr_   z AutoQuantizationConfig.from_dict   s    /33NC#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??1, @/44678: 
 6lC
##$<==    c                     t        j                  |fi |}t        |dd       t        d| d      |j                  }| j                  |      } |j                  di | |S )Nquantization_configz)Did not found a `quantization_config` in z2. Make sure that the model is correctly quantized. )r   from_pretrainedgetattrrZ   rf   r_   update)r`   pretrained_model_name_or_pathkwargsmodel_configrN   rf   s         rc   rh   z&AutoQuantizationConfig.from_pretrained   s    !112OZSYZ<!6=E;<Y;Z  [M  N  $0#C#C !mm,DE""",V,""rd   N)__name__
__module____qualname____doc__classmethoddictr_   rh   rg   rd   rc   rM   rM   ~   s6    
 > > >( 
# 
#rd   rM   c                   r    e Zd ZdZedeez  fd       Zed        Zedeez  dedz  fd       Z	e
d        Zy)	AutoHfQuantizerz
     The Auto-HF quantizer class that takes care of automatically instantiating to the correct
    `HfQuantizer` given the `QuantizationConfig`.
    rf   c           	      D   t        |t              rt        j                  |      }|j                  }|t
        j                  k(  r|j                  r|dz  }n|dz  }|t        vr,t        d| dt        t        j                                      t        |   } ||fi |S )NrT   rS   rV   rW   )
isinstancers   rM   r_   rP   r   rY   rQ   r]   rZ   r\   r^   )r`   rf   rl   rP   rb   s        rc   from_configzAutoHfQuantizer.from_config   s     )40"8"B"BCV"W*77 -<<<"//''551, @/44678: 
 ,L9
-888rd   c                 P    t        j                  |fi |}| j                  |      S )N)rM   rh   rx   )r`   rk   rl   rf   s       rc   rh   zAutoHfQuantizer.from_pretrained   s*    4DDEbmflm233rd   quantization_config_from_argsNc                 *   |d}nd}t        |t              r;t        |t              rt        j                  |      }nt        j                  |      }|g|j
                  j                  |j
                  j                  k7  r:t        d|j
                  j                   d|j
                  j                   d      t        |t        t        t        t        t        t        t        t        f      rW|U|j                         }|j!                         D ]  \  }}t#        |||        |dt%        |j'                                dz  }|dk7  r2t        |t        t        t        f      st)        j*                  |       |S t,        j/                  |       |S )z
        handles situations where both quantization_config from args and quantization_config from model config are present.
        zYou passed `quantization_config` or equivalent parameters to `from_pretrained` but the model you're loading already has a `quantization_config` attribute. The `quantization_config` from the model will be used. zThe model is quantized with z but you are passing a z| config. Please make sure to pass the same quantization config class to `from_pretrained` with different loading attributes.z"However, loading attributes (e.g. z]) will be overwritten with the one you passed to `from_pretrained`. The rest will be ignored.)rw   rs   r   r_   rM   	__class__rn   rZ   r   r   r   r   r   r   r   get_loading_attributesitemssetattrr\   r^   warningswarnloggerinfo)r`   rf   rz   warning_msgloading_attr_dictattrvals          rc   merge_quantization_configsz*AutoHfQuantizer.merge_quantization_configs   s    )4y 
 K)407I&5&?&?@S&T#&<&F&FGZ&[# *5#--66:W:a:a:j:jj./B/L/L/U/U.VVm  oL  oV  oV  o_  o_  n` `F F  ###+(	 .9 = T T V.446 8	c+T378 ?EVE[E[E]@^?_  `}  ~  ~K"Z0CkS^`tEu%vMM+& #" KK$""rd   c           	      ^   | j                  dd       }| j                  dd      s| j                  dd      r*| j                  dd      rdnd}t        j                  |z   }n|t        d      |t        vr8t
        j                  d| d	t        t        j                                d
       yy)NrP   rQ   FrR   rS   rT   rU   rV   rW   z~. Hence, we will skip the quantization. To remove the warning, you can delete the quantization_config attribute in config.jsonT)
rX   r   rY   rZ   r[   r   warningr\   r]   r^   )rN   rP   ra   s      rc   supports_quant_methodz%AutoHfQuantizer.supports_quant_method	  s    /33NDI#''>BZB^B^_motBu 8 < <^U SWY`F-<<vEL! \  ??NN1, @/44678 9ii
 rd   )rn   ro   rp   rq   rr   r   rs   rx   rh   r   staticmethodr   rg   rd   rc   ru   ru      s    
 9.E.L 9 90 4 4 ;#!$;;;# (?'E;# ;#z  rd   ru   methodc                       fd}|S )z-Register a custom quantization configuration.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzConfig '' already registeredz*Config must extend QuantizationConfigMixin)r[   rZ   
issubclassr   	TypeError)r`   r   s    rc   register_config_fnz8register_quantization_config.<locals>.register_config_fn!  sH    55xx/CDEE#67HII36(0
rd   rg   )r   r   s   ` rc   register_quantization_configr     s     rd   namec                       fd}|S )zRegister a custom quantizer.c                 ~    t         v rt        d d      t        | t              st	        d      | t         <   | S )NzQuantizer 'r   z!Quantizer must extend HfQuantizer)r]   rZ   r   r   r   )r`   r   s    rc   register_quantizer_fnz1register_quantizer.<locals>.register_quantizer_fn1  sG    )){4&0DEFF#{+?@@'*t$
rd   rg   )r   r   s   ` rc   register_quantizerr   .  s     ! rd   c                 "   t        | d      }|r!t        j                  | j                        sd}|s|Q|r&t        j	                  | j                  |      | _        n|| _        t        j                  | j                  |      }nd }||j                  ||       |j                  |      }|j                  |       } |j                  |       } t        |j                  dd      s&|j                  j                  }t        |d|      |d<   || |fS )Nrf   F)pre_quantized)
device_mapweights_only
dequantizevaluequant)hasattrru   r   rf   r   rx   validate_environmentupdate_device_mapupdate_tp_planupdate_ep_planri   rP   )configrf   r   r   
user_agentr   hf_quantizerrP   s           rc   get_hf_quantizerr   >  s   F$9:M_BB6C]C]^+7)8)S)S**,?*F& *=F&&22&&' 3 

 ))!% 	* 	
 "33J?
,,V4,,V4 |77uM';;HHL"),"NJw++rd   )Yr   models.auto.configuration_autor   utilsr   utils.quantization_configr   r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   baser   quantizer_aqlmr    quantizer_auto_roundr!   quantizer_awqr"   quantizer_bitnetr#   quantizer_bnb_4bitr$   quantizer_bnb_8bitr%   quantizer_compressed_tensorsr&   quantizer_eetqr'   quantizer_fbgemm_fp8r(   quantizer_finegrained_fp8r)   quantizer_fouroversixr*   quantizer_fp_quantr+   quantizer_gptqr,   quantizer_higgsr-   quantizer_hqqr.   quantizer_metalr/   quantizer_mxfp4r0   quantizer_quantor1   quantizer_quarkr2   quantizer_sinqr3   quantizer_spqrr4   quantizer_torchaor5   quantizer_vptqr6   r]   r[   
get_loggerrn   r   rM   ru   strr   r   r   rg   rd   rc   <module>r      s    7       4  + 4 ' / 2 2 F + 6 @ 9 2 + - ) - - / - + + 1 +	<+ + O	
 O   ) " O  
> 6 & !  !" O#$ $$/ 4$	9$+$ +$ J	$
 J$ J$ l$ [$ $$ $ 
9$ 1$ /$ [$ }$  !$" J#$$ !/$  4 
		H	%&# &#Rt tn  !S ! !,rd   