
    qi                         d dl mZ d dlmZmZ d dlmZmZ  e       r
ddlZddl	m
Z
  ej                  e      Z G d de      Z	 	 d
dee   dz  fd	Zy)   )ConversionOps)get_module_from_nameshould_convert_module)is_torch_availablelogging    Nc                       e Zd Zd Z	 	 	 d	deeeej                     f   dej                  j                  dz  dedz  dee   dz  deeej                  f   f
dZy)
QuantoQuantizec                     || _         y )N)hf_quantizer)selfr   s     R/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/quanto.py__init__zQuantoQuantize.__init__   s
    (    N
input_dictmodelfull_layer_namemissing_keysreturnc                 6   t        |j                               d   \  }}|d   }ddlm}  ||||       t	        ||      \  }	}t        j                  |	j                  j                        |	_        t        j                  |	j                  j                        |	_	        |	j                          d|	j                  _        d|	_        |j                  dd      d   }
|j                  |
 d       |j                  |
 d	       |j                  |
 d
       i S )Nr   r   )_load_parameter_into_modelFT.   z.weightz.input_scalez.output_scale)tupleitemsmodeling_utilsr   r   torchonesinput_scaleshapeoutput_scalefreezeweightrequires_grad_is_hf_initializedrsplitdiscard)r   r   r   r   r   kwargs_valuer   modulemodule_names              r   convertzQuantoQuantize.convert   s     ))+,Q/5a?"5/5A(@	"ZZ(:(:(@(@A#jj)<)<)B)BC&+#$(! &,,S!4Q7}G45}L9:}M:;	r   )NNN)__name__
__module____qualname__r   dictstrlistr   TensornnModuler-    r   r   r
   r
      s    ) )-&*)-d5<<001 xx% t	
 3i$& 
c5<<	 r   r
   modules_to_not_convertc                    ddl m}m}m}m}m}m} ||||d}	d||d}
d}| j                         D ]4  \  }}t        ||      st        j                  d      5  d}t        |t        j                        r] ||j                  |j                  |j                   du|j"                  j$                  |	|j&                     |
|j(                           }ntt        |t        j                  j*                        rP|j(                  D ||j,                  |j.                  |j0                  |j                   du|
|j(                     	      }|d
}| j3                  ||       ddd       7 |st4        j7                  d       | S # 1 sw Y   [xY w)a  
    Public method that recursively replaces the Linear layers of the given model with Quanto quantized layers.
    Returns the converted model and a boolean that indicates if the conversion has been successful or not.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        quantization_config (`QuantoConfig`, defaults to `None`):
            The quantization config object that contains the quantization parameters.
        modules_to_not_convert (`list`, *optional*, defaults to `None`):
            A list of modules to not convert. If a module name is in the list (e.g. `lm_head`), it will not be
            converted.
    r   )
QLayerNormQLinearqfloat8qint2qint4qint8)float8int8int4int2N)Nr@   rA   Fmeta)in_featuresout_featuresbiasdtypeweightsactivations)rJ   TzYou are loading your model using quanto but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)optimum.quantor:   r;   r<   r=   r>   r?   named_modulesr   r   device
isinstancer5   LinearrE   rF   rG   r#   rH   rI   rJ   	LayerNormnormalized_shapeepselementwise_affineset_submoduleloggerwarning)r   quantization_configr8   r:   r;   r<   r=   r>   r?   	w_mapping	a_mappinghas_been_replacedr,   r+   
new_modules                  r   replace_with_quanto_layersr\   >   s}   $ QP"E5%PIw>I$224 =V$[2HI\\&! 	=J&")),$ & 2 2!'!4!4D0 ----%&9&A&AB )*=*I*I J
 FEHH$6$67<O<[<[<g'++JJ--KKt+ )*=*I*I J
 %$(!##K<+	= 	==4 	
 L=	= 	=s   DFF	)NN)core_model_loadingr   quantizers.quantizers_utilsr   r   utilsr   r   r   torch.nnr5   
get_loggerr.   rU   r
   r3   r2   r\   r7   r   r   <module>rb      s]    / U / 			H	% ]  J /39 !I,9r   