
    qi                        d dl mZ d dlmZ d dlmZmZ ddlmZ ddl	m
Z
mZ  ej                  e      Z e       r
d dlZd dlmZ 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ G d	 d
e      Z G d de      Zy)    )annotations)Any)is_torch_availablelogging   )ConversionOps)get_module_from_nameshould_convert_moduleNc           
        ddl m} |g }t        | j                               D ]  \  }}t	        |t
        j                        s!t        ||      s.|j                  d      \  }	}
}|	r| j                  |	      n| } ||s|j                  nd|s|j                  nd|s|j                  dund|||d      }t        |||        | S )a  
    Replace nn.Linear modules with empty SINQLinear modules.

    Args:
        model: The model to modify
        modules_to_not_convert: List of module names to skip
        quant_config: SINQ quantization config dict (None for pre-quantized models)
        compute_dtype: Computation dtype for the quantized layers
        device: Device string for the quantized layers
        pre_quantized: Whether loading a pre-quantized checkpoint

    Returns:
        The modified model with SINQLinear modules
    r   )
SINQLinearN.FT)in_featuresout_featuresbiasquant_configcompute_dtypedeviceuse_unpack_kernel)sinq.sinqlinear_hfr   listnamed_modules
isinstancennLinearr
   
rpartitionget_submoduler   r   r   setattr)modelmodules_to_not_convertr   r   r   pre_quantizedr   	full_namemoduleparent_path_
child_nameparent
sinq_layers                 P/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/sinq.pyreplace_with_sinq_linearr)       s    , .%!#!%"5"5"78 0	6&")),$Y0FG%.%9%9#%>"Q
5@$$[1e2?**T4A,,t2?&++T)U%'"

 	
J/'0* L    c                  4    e Zd ZdZd Z	 	 	 d	 	 	 	 	 	 	 ddZy)SinqQuantizea'  
    Param-level ConversionOp for SINQ (from FP weights).

    At load time, for each `Linear.weight` that should be quantized:
      - The SINQLinear module already exists (created in _process_model_before_weight_loading)
      - We just call quantize() on it with the loaded weight tensor
    c                    || _         y Nhf_quantizerselfr0   s     r(   __init__zSinqQuantize.__init__\   
    (r*   Nc                    t        t        |j                                     \  }}t        |t              r|d   n|}t        ||      \  }	}
|	j                  |       ||j                  |       d|	_        i S )Nr   T)	nextiteritemsr   r   r	   quantizediscard_is_hf_initialized)r2   
input_dictr   full_layer_namemissing_keyskwargsr$   valuesweight_tensorr"   tensor_names              r(   convertzSinqQuantize.convert_   sr     j..012	6%/%=q	625/J&#  1$(!	r*   )NNNr<   zdict[str, Any]r   ztorch.nn.Module | Noner=   z
str | Nonereturnzdict[str, torch.Tensor]__name__
__module____qualname____doc__r3   rC    r*   r(   r,   r,   S   sB    ) )-&*" & $	 
!r*   r,   c                  2    e Zd ZdZd Z	 	 d	 	 	 	 	 	 	 ddZy)SinqDeserializea0  
    ConversionOp for loading *pre-quantized* SINQ checkpoints.

    Checkpoint layout (what `SINQLinear.state_dict` produces) is, per module:
        <prefix>.W_q
        <prefix>.bias
        <prefix>.meta

    WeightConverter in the quantizer is configured so that:
      - we group ".W_q", ".meta", ".bias" as input_dict
      - conceptually treat them as belonging to "<prefix>.weight"
      - and call this SinqDeserialize.convert to load the state into the existing SINQLinear.

    The returned dict is {} because we load directly into the module.
    c                    || _         y r.   r/   r1   s     r(   r3   zSinqDeserialize.__init__   r4   r*   Nc                   t        |j                               D ]  \  }}t        |t               s|d   ||<     |j                  d      }|j                  d      }|j                  d      }	||;t	        t        |j                                     }t        |t               r|d   }||iS t        ||      \  }
}||d}|	|	|d<   |
j                  |       d|
_	        i S )Nr   z.W_qz.metaz.bias)W_qmetar   T)
r   r8   r   getr6   r7   r@   r	   load_state_dictr;   )r2   r<   r   r=   r?   kvrP   rQ   r   r"   r$   states                r(   rC   zSinqDeserialize.convert   s     ))+, 	%DAq!T" !!
1	% nnV$~~g&~~g& ;$,T*++-./A!T"aD#Q''(@	 
  E&Mu%$(!	r*   )NNrD   rF   rK   r*   r(   rM   rM   v   s?     ) )-&*	#"# &# $	# 
!#r*   rM   )NNNzcuda:0F)r   torch.nn.Moduler   zlist[str] | Noner   zdict | Noner   ztorch.dtyper   strr    boolrE   rW   )
__future__r   typingr   transformers.utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr	   r
   
get_loggerrG   loggertorchtorch.nnr   r)   r,   rM   rK   r*   r(   <module>rc      s    #  : . U 
		H	%
 04 $!%00,0 0 	0
 0 0 0f =  F7m 7r*   