
    qi                     x    d dl Z ddlmZ ddlmZ  e       rd dlmZ d dlmZ ddl	m
Z
  G d d	e
      Zd
efdZy)    N   )get_module_from_name)is_fouroversix_available)ModelQuantizationConfig)FourOverSixConfig)ConversionOpsc                       e Zd Zd Z	 	 	 d	deeej                  f   dej                  j                  dz  dedz  de
e   dz  deeej                  f   f
dZy)
FourOverSixQuantizec                     || _         y )N)hf_quantizer)selfr   s     W/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/fouroversix.py__init__zFourOverSixQuantize.__init__   s
    (    N
input_dictmodelfull_layer_namemissing_keysreturnc                    | j                   j                  j                  r|S t        ||      \  }}|j	                  dd      d   }t        |j                               d   }	|	j                  | ddd      }
||	   d   }|j                  |
|      }|j                  D ]  }
t        ||
        |D ]  }|j                  |        |D ci c]  }| d| ||    c}S c c}w )aZ  
        We need to store some parameters to create the quantized weight. For example, fouroversix
        requires 4 values that are stored in the checkpoint to recover the quantized weight. So we
        store them in a dict that is stored in hf_quantizer for now as we can't save it in the op
        since we create an op per tensor.
        .   r    )r   quantization_configkeep_master_weightsr   rsplitlistkeysreplaceget_quantized_parametersparameters_to_quantizedelattrdiscard)r   r   r   r   r   kwargsmodule_module_namefull_parameter_nameparameter_name	parameterquantized_parameterskeyquantized_keys                  r   convertzFourOverSixQuantize.convert   s    00DD(@	%,,S!4Q7":??#45a8,44}A5FAN23A6	%>>~yY %;; 	,NFN+	,  	&C  %	&
 "6
 m1]O,.B=.QQ
 	
 
s   C!)NNN)__name__
__module____qualname__r   dictstrtorchTensornnModuler   r.    r   r   r
   r
      s}    ) )-&*)-&
ell*+&
 xx%&
 t	&

 3i$&&
 
c5<<	 &
r   r
   configc                     t        | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  | j                  | j                        S )N)activation_scale_ruledtypegradient_scale_ruler   matmul_backendoutput_dtypequantize_backend
scale_ruleweight_scale_2dweight_scale_rulemodules_to_not_convertmodule_config_overrides)r   r;   r<   r=   r   r>   r?   r@   rA   rB   rC   rD   rE   )r9   s    r   adapt_fouroversix_configrF   <   sy    "$::ll"66"66,,((00$$.. 22%<< & > > r   )r4   quantizers.quantizers_utilsr   utilsr   fouroversixr   &transformers.utils.quantization_configr   core_model_loadingr   r
   rF   r8   r   r   <module>rL      s9     > , 3 D .*
- *
Z%6 r   