
    qi#                         d dl mZmZ ddlmZmZmZ ddlmZ ddl	m
Z
  e       rd dlZerddlmZ  ej                  e      Z G d	 d
e      Zy)    )TYPE_CHECKINGAny   )is_kernels_availableis_torch_availablelogging   )HfQuantizer)get_module_from_nameN)PreTrainedModelc                        e Zd ZdZdZ fdZd Zdeee	f   dz  deee	f   dz  fdZ
d	d
dedefdZddZd Zedefd       Zd Zd Z xZS )MetalHfQuantizera&  
    Quantizer for Metal affine quantization on Apple Silicon (MPS) devices.

    Uses the ``quantization-mlx`` Metal kernels from the Hub to pack weights into
    low-bit (2/4/8) uint32 tensors with per-group scales and biases, and performs
    fused dequant + matmul in the forward pass.
    Fc                 &    t        |   |fi | y )N)super__init__)selfquantization_configkwargs	__class__s      Y/opt/pipecat/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_metal.pyr   zMetalHfQuantizer.__init__)   s    ,77    c                    | j                   j                  ry t        j                  j                  j                         s>| j                  r't        j                  d       d| j                   _        y t        d      t               st        d      |j                  d      }|t        j                  d       y t        |t              r=| j                  s0d|j                         v sd|j                         v rt!        d	      y y y )
NzMetal quantization requires an Apple Silicon GPU (MPS), but none is available. We will default to dequantizing the model to the original dtype.TzLMetal quantization requires an Apple Silicon GPU (MPS). No MPS device found.z:Metal quantization requires kernels: `pip install kernels`
device_mapzYou have loaded a Metal quantized model on CPU and have an MPS device available. Set device_map='mps' to use the Metal kernels.cpudiskzMetal quantization on the fly does not support CPU or disk in the device_map. Please use a pre-quantized checkpoint or remove CPU/disk from device_map.)r   
dequantizetorchbackendsmpsis_availablepre_quantizedloggerwarning_onceRuntimeErrorr   ImportErrorget
isinstancedictvalues
ValueError)r   argsr   r   s       r   validate_environmentz%MetalHfQuantizer.validate_environment,   s    ##..~~!!..0!!##W 7;((3"#qrr#%Z[[ZZ-
A 
D)%%5J4E4E4G+G6U_UfUfUhKh `  Li% *r   r   Nreturnc                     |ddi}|S )N r    )r   r   s     r   update_device_mapz"MetalHfQuantizer.update_device_mapK   s    eJr   modelr   
param_namec                 l    ddl m} t        ||      \  }}t        ||      r| j                  s|dk7  ryyy)Nr   )MetalLinearweightFT)integrations.metal_quantizationr5   r   r'   r!   )r   r2   r3   r   r5   moduletensor_names          r   param_needs_quantizationz)MetalHfQuantizer.param_needs_quantizationP   s9    A25*Efk*!![H%<r   c                     ddl m} | j                  || j                  j                  |j
                        | _         ||| j                  | j                  | j                        }y )Nr   )replace_with_metal_linear)modules_to_not_convertr   r!   )r7   r<   get_modules_to_not_convertr   r=   _keep_in_fp32_modulesr!   )r   r2   r   r<   s       r   $_process_model_before_weight_loadingz5MetalHfQuantizer._process_model_before_weight_loadingZ   s[    O&*&E&E4++BBED_D_'
# *#'#>#> $ 8 8,,	
r   c                      y)NTr0   r   s    r   is_serializablez MetalHfQuantizer.is_serializableh   s    r   c                      y)NFr0   rB   s    r   is_trainablezMetalHfQuantizer.is_trainablek   s    r   c                     ddl m}  ||       S )Nr   )MetalQuantize)r7   rG   )r   rG   s     r   get_quantize_opsz!MetalHfQuantizer.get_quantize_opso   s    CT""r   c                     ddl m} ddlm} | j                  r+| j
                  j                  r |g dd ||       g      gS g S )Nr   )WeightConverter)MetalDequantize)zweight$scalesqbiasesr6   )source_patternstarget_patterns
operations)core_model_loadingrJ   r7   rK   r!   r   r   )r   rJ   rK   s      r   get_weight_conversionsz'MetalHfQuantizer.get_weight_conversionst   sI    8E$":":"E"E$D$, / 56  	r   )r2   r   )__name__
__module____qualname____doc__requires_calibrationr   r,   r(   strr   r1   boolr:   r@   rC   propertyrE   rH   rR   __classcell__)r   s   @r   r   r      s     !8>DcNT,A d3PS8nW[F[ 
.? S _c 
 d  #
r   r   )typingr   r   utilsr   r   r   baser
   quantizers_utilsr   r   modeling_utilsr   
get_loggerrS   r"   r   r0   r   r   <module>rb      sG    & E E  2 0			H	%b{ br   