
    qi`                         d dl Zd dlmZ d dlmZ ddlmZ erddlm	Z	 ddl
mZmZmZmZ dd	lmZ  e       rd dlZ ej$                  e      Z G d
 de      Zy)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_accelerate_availableis_gptqmodel_availableis_torch_availablelogging)
AwqBackendc                   V     e Zd ZdZdZ fdZd Zd Zd
dZd Z	d Z
ed	        Z xZS )AwqQuantizerzu
    4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
    Tc                 &    t        |   |fi | y )N)super__init__)selfquantization_configkwargs	__class__s      W/opt/pipecat/venv/lib/python3.12/site-packages/transformers/quantizers/quantizer_awq.pyr   zAwqQuantizer.__init__+   s    ,77    c                 X    t               st        d      t               st        d      y )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r
   ImportErrorr	   )r   r   s     r   validate_environmentz!AwqQuantizer.validate_environment.   s1    %'s  '(mnn )r   c                    |t         j                  k(  rct         j                  j                         st         j                  j                         r't
        j                  d       t         j                  }|S |t         j                  k7  rQt         j                  j                         st         j                  j                         rt
        j                  d       |S )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)torchbfloat16cudais_availablexpuloggerwarningfloat16)r   dtypes     r   update_dtypezAwqQuantizer.update_dtype7   s    ENN"

(?(?(AUYYE[E[E]NNm MME  emm#)@)@)BeiiF\F\F^NNtur   c                    ddl m}m} | j                  || j                  j
                  |j                  d      | _         ||| j                  | j
                  |j                  d            } |||j                  j                        }y )Nr   )replace_quantization_scalesreplace_with_awq_linearT)add_default_skips
device_map)r   modules_to_not_convertr+   )
integrationsr(   r)   get_modules_to_not_convertr   r,   _keep_in_fp32_modulesgetconfig
model_type)r   modelr   r(   r)   s        r   $_process_model_before_weight_loadingz1AwqQuantizer._process_model_before_weight_loadingA   s    W&*&E&E4++BBED_D_sw 'F '
# ( $ 8 8#'#>#>zz,/	
 ,E5<<3J3JKr   c                 L    ddl m}  ||| j                  j                         y )Nr   )hf_gptqmodel_post_init)use_act_order)gptqmodel.utils.modelr6   r   desc_act)r   r3   r   r6   s       r   #_process_model_after_weight_loadingz0AwqQuantizer._process_model_after_weight_loadingQ   s    @uD4L4L4U4UVr   c                     | j                   j                  t        j                  t        j                  fv rt
        j                  d       yy)Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   backendr   
EXLLAMA_V1
EXLLAMA_V2r"   r#   r   s    r   is_serializablezAwqQuantizer.is_serializableV   s:    ##++
0E0EzG\G\/]]NNTUr   c                     t        j                  t        j                  j                  d            t        j                  d      k\  S )N	gptqmodelz5.0.0)r   parse	importlibmetadatar?   s    r   is_trainablezAwqQuantizer.is_trainable]   s1    }}Y//77DEW^I___r   )r3   r   )__name__
__module____qualname____doc__requires_calibrationr   r   r&   r4   r:   r@   propertyrF   __classcell__)r   s   @r   r   r   #   sG    
  8oL W
 ` `r   r   )importlib.metadatarD   typingr   	packagingr   baser   modeling_utilsr   utilsr	   r
   r   r   utils.quantization_configr   r   
get_loggerrG   r"   r    r   r   <module>rW      sO         0 ` ` 2 			H	%<`; <`r   