
    qi              
           d Z ddlmZ ddlmZmZ  e       r
ddlZddlmZ  ej                  e
      Zddddd	ddd	ddd
dddddd	dddddd	ddZd Z	 	 	 ddeez  dz  defdZy)z;AWQ (Activation aware Weight Quantization) integration file   )should_convert_module)is_torch_availablelogging    Nactc_fc)r   layer_before_actdense_h_to_4hup_projfc_in	gelu_impl)
starcoder2RefinedWebModelfalconmptgptjgpt_neoxgpt_bigcodebloomc                 Z   ddl m} |t        vr| S | j                         D ]  \  }}t        |   d   }t        |   d   }||k(  rYt	        | |      rMt        | t        |   d         }|j                  }t        j                  |      }	 |||	      | j                  |<   t        ||      }
 | S )Nr   )ScaledActivationr   r	   )&gptqmodel.quantization.awq.modules.actr   AWQ_SCALES_MAPPINGSnamed_childrenhasattrgetattrout_featurestorchones_modulesreplace_quantization_scales)model
model_typer   namemoduleact_namelayer_before_act_namer	   size
scale_like_s              O/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/awq.pyr!   r!   '   s    G,,,,. <f&z259 3J ?@R S8/D E&u.A*.MN`.ab#00DD)J#3FJ#GENN4 '
;< L    
device_mapreturnc                    ddl m} ddlm}  ||j                  |j
                  dd|j                  |j                  ||j                  |j                  d
      }| j                         D ]  \  }}t        ||      st        j                  d      5  t        |t        j                         r ||j                  |j"                  |j$                  |j
                  |j&                  |j(                  |j*                  du|j,                  j                  d		      }	|	j/                  d       | j1                  ||	       d}
ddd        
st2        j5                  d
       | S # 1 sw Y   xY w)a  
    Public method that replaces the linear layers of the given model with awq quantized layers.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`AwqConfig`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
            A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
            converted.
        device_map (`Union[str, dict]`, *optional*, defaults to `None`):
            The device map that maps the parameters to the device
    r   )METHOD)hf_select_quant_linear_v2F)
bits
group_sizedesc_actsymformatbackendr-   quant_method
zero_pointpackmetaNT)	r2   r5   r4   r3   in_featuresr   biasdevregister_bufferszYou are loading your model using eetq but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)gptqmodel.quantizationr0   gptqmodel.utils.importerr1   r2   r3   r6   r7   AWQr9   named_modulesr   r   device
isinstancennLinearr5   r4   r<   r   r=   weightrequires_grad_set_submoduleloggerwarning)r"   modules_to_not_convertquantization_configr-   r0   r1   
target_clsmodule_namer%   
new_modulehas_been_replaceds              r+   replace_with_awq_linearrS   8   sT   ( .B* %%&11"))#++ZZ&11J  %224 )V$[2HI\\&! 	)&")),',11+//0992== & 2 2!'!4!4D0,,%)

 ))%0##K<$(!	) 	))( 	
 L1	) 	)s   B-E##E-	)NNN)__doc__quantizers.quantizers_utilsr   utilsr   r   r   torch.nnrF   
get_logger__name__rK   r   r!   strdictboolrS    r,   r+   <module>r^      s    > ? / 			H	%  V<$/JAi8w7?C f= oF	 &  $(	? d
T!	?
 
?r,   