
    qi                     
   d dl mZ d dlmZ d dlmZmZ  e       r
ddlZddlm	Z	  ej                  e      Z G d de      Z G d d	ej                  j                        Z G d
 de	j"                        Zddee   dz  fdZy)   )ConversionOps)should_convert_module)is_torch_availablelogging    Nc            	       r    e Zd Zd Z	 ddeeeej                     f   dedz  deeej                  f   fdZ	y)EetqQuantizec                     || _         y N)hf_quantizer)selfr   s     P/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/eetq.py__init__zEetqQuantize.__init__   s
    (    N
input_dictfull_layer_namereturnc                 j   t        |j                               d   \  }}|d   }|j                  }t        j                  |      j                         j                         }t        j                  |t        j                  d      \  }}|j                  |      }|j                  |      }||| d|iS )Nr   F_scales)tupleitemsdevicetorcht
contiguouscpueetq_kernels_hubquant_weightsint8to)	r   r   r   kwargs_valuevalue_deviceint8_weightscaless	            r   convertzEetqQuantize.convert   s     ))+,Q/5a||ggen//1557.<<[%**V[\V!nn\2<(0A.I6RRr   r   )
__name__
__module____qualname__r   dictstrlistr   Tensorr'    r   r   r	   r	      sU    ) X\SsD$667SJMPT*S	c5<<	 Sr   r	   c                   .    e Zd Zedd       Zed        Zy)EetqLinearMMFunctionNc                 r    | j                  ||||       t        j                  |||      }|||z   }|S |}|S r   )save_for_backwardr   w8_a16_gemm)ctxxweightr&   biasoutputs         r   forwardzEetqLinearMMFunction.forward0   sK     	a6!--a@"&"2$ 9?r   c                    | j                   \  }}}}t        j                  |j                  d         j	                  |j
                        j	                  |j                        }t        j                  |||      }| j                  d   r?|j                  d      j                  |j                  dd            j                  d      }d d d fS )Nr      )saved_tensorsr   eyeshaper    r   dtyper   r4   needs_input_gradsqueezematmul	transpose	unsqueeze)r5   grad_outputinputr7   r&   r8   identity
grad_inputs           r   backwardzEetqLinearMMFunction.backward8   s    &)&7&7#vvt99V\\!_-00?BB5;;O "--hG"$,,Q/66v7G7G17MNXXYZ[J4t++r   r   )r(   r)   r*   staticmethodr:   rJ   r/   r   r   r1   r1   /   s(      , ,r   r1   c                   >     e Zd Zej                  df fd	Zd Z xZS )
EetqLinearFc                    t         |           t        j                  t	        j
                  ||f|      d      | _        t        j                  t	        j
                  |t        j                              | _        |r>t        j                  t	        j
                  |t        j                              | _	        y d | _	        y )N)r@   F)requires_grad)
superr   nn	Parameterr   emptyr7   float16weight_scalesr8   )r   in_featuresout_featuresr@   r8   	__class__s        r   r   zEetqLinear.__init__H   s{    ll5;;\/JRW#Xhmn\\%++|EMM*Z[U[[,u}}%UVDIDIr   c                 r    t         j                  || j                  | j                  | j                        }|S r   )r1   applyr7   rU   r8   )r   rG   r9   s      r   r:   zEetqLinear.forwardQ   s.    %++E4;;@R@RTXT]T]^r   )r(   r)   r*   r   r   r   r:   __classcell__)rX   s   @r   rM   rM   G   s    8=

 r   rM   modules_to_not_convertc                    ddl m}  |d      ad}|ri nddi}| j                         D ]  \  }}t	        ||      st        j                  d      5  t        |t        j                        rCt        |j                  |j                  fd|j                  dui|}| j                  ||       d	}ddd        |st        j!                  d
       | S # 1 sw Y   xY w)a  
    A helper function to replace all `torch.nn.Linear` modules by `EetqLinear` modules.

    Parameters:
        model (`torch.nn.Module`):
            Input model or `torch.nn.Module` as the function is run recursively.
        modules_to_not_convert (`list[`str`]`, *optional*, defaults to `None`):
            Names of the modules to not convert in `EetqLinear`. In practice we keep the `lm_head` in full precision
            for numerical stability reasons.
    r<   )
get_kernelz#kernels-community/quantization-eetqFr@   Nmetar8   TzYou are loading your model using eetq but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)hub_kernelsr^   r   named_modulesr   r   r   
isinstancerQ   LinearrM   rV   rW   r8   set_submoduleloggerwarning)	modelr\   pre_quantizedr^   has_been_replacedmodule_kwargsmodule_namemodule
new_modules	            r   replace_with_eetq_linearrn   V   s     ( ""GH'Bgt_M$224 	)V$[2HI\\&! 	)&")),'&&(;(;BH++UYBY]j
 ##K<$(!	) 	)	) 	
 L	) 	)s   ACC	)NF)core_model_loadingr   quantizers.quantizers_utilsr   utilsr   r   r   torch.nnrQ   
get_loggerr(   re   r	   autogradFunctionr1   ModulerM   r-   r,   rn   r/   r   r   <module>rw      s{    / ? /  
		H	%S= S(,5>>22 ,0 %DI<L %r   