
    qi3                        d dl Zd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ  e
       rddlmZ ddlmZ  e       ru ej                   ej"                  j                  d            Z ej                   ej"                  j                  d             ej                   d	      k\  rd d
lmZ d dlmZ  ej.                  e      Zdededz  fdZd Zd Z G d de      Z G d de      Zy)    N)version)logging)is_torch_availableis_torchao_available   )ConversionOps)get_module_from_nametorchao0.15.0)unflatten_tensor_state_dict)is_metadata_torchaoconfig_namereturnc                 v    | j                         } t        j                  d|       }|r|j                  d      S y)z
    Extract the size digit from strings like "4weight", "8weight".
    Returns the digit as an integer if found, otherwise None.
    z
(\d)weight   N)lowerresearchgroup)r   	str_matchs     S/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/torchao.pyfuzzy_match_sizer   *   s7    
 ##%K		-5Iq!!    c                    ddl m} ddlm} t	        | |      r*| j
                  j                   d| j                          dS t	        | |      r<| j
                  j                   d| j                   dt        | j                         dS y )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   s      r   r%   r%   9   s    4g&/0""++,Af.G.G.I-J!LL&9:""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;r   c                    t        | j                        }|7d| j                  j                  d    d| j                  j                  d    dS d| j                  j                  d    d| j                  j                  d    d| S )Nzin_features=r   z, out_features=r   z, weight=Noner   )r%   r(   shape)selfr(   s     r   _linear_extra_reprr,   D   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                       e Zd Zd Z	 	 	 ddeeej                  f   dej                  j                  dz  dedz  deeej                  f   fdZ
y)	TorchAoQuantizec                     || _         y Nhf_quantizerr+   r2   s     r   __init__zTorchAoQuantize.__init__M   
    (r   N
input_dictmodelfull_layer_namer   c                 
   ddl m} t        |j                               d   \  }}t	        |t
              r|d   n|}t        ||      \  }	}
t        j                  j                  ||j                        |	j                  |
<   |j                         }t        |	      t        |      k(  }| j                  j                  j                   }|r)|r't#        |j$                  j'                  d      dd       | j                  j                  j)                         t+        j,                  d      k\  rdd	l m} | j                  j                  j1                         }t	        ||      ro|j3                  d
d      \  }}d }||j4                  v r(|j7                  d      rJ d       |j8                  |   }n||j4                  v r(|j7                  d      rJ d       |j8                  |   }n|j4                  D ]h  }|j7                  d      st;        j<                  |dd  |      r|j8                  |   } nHt;        j<                  |dd  |      sY|j8                  |   } n |j8                  j?                  dd       }||dk(  ri|r|r|	j@                  jC                         } ||	|d        |jE                  |       d|	_#        |	jI                  d      D ]	  }d|_#         |r|rdiS i S  |||i      } ||	|d        |jE                  |       d|	_#        |	jI                  d      D ]	  }d|_#         i S ||iS | j                  j                  j)                         t+        j,                  d      k\  r]ddl m%} | j                  j                  j1                         }t	        ||      r&|j3                  d
d      \  }}d }||j8                  v r(|j7                  d      rJ d       |j8                  |   }ni|j8                  D ]>  }|j7                  d      st;        j<                  |dd  |      s/|j8                  |   } n |j8                  j?                  dd       }|j|r|r|	j@                  jC                         } ||	|d        |jE                  |       d|	_#        |	jI                  d      D ]	  }d|_#         |r|rdiS i S ||iS |r|r|	j@                  jC                         } ||	| j                  j                  j1                                |jE                  |       d|	_#        |	jI                  d      D ]	  }d|_#         |r|rdiS i S )Nr   )	quantize_)requires_gradT)decodertie_word_embeddingsFr   )FqnToConfig.r   zre:zHparam fqn should not start with`re:`, which is used for specifying regexzImodule fqn should not start with`re:`, which is used for specifying regex   _defaultr(   c                      yNT xfqns     r   <lambda>z)TorchAoQuantize.convert.<locals>.<lambda>       r   )recursezlm_head.weight)	filter_fnz0.12.0)ModuleFqnToConfigc                      yrC   rD   rE   s     r   rH   z)TorchAoQuantize.convert.<locals>.<lambda>   rI   r   )&torchao.quantizationr:   tupleitemsr"   listr	   torchnn	Parameterr;   _parametersget_input_embeddingsidr2   quantization_configuntie_embedding_weightssetattrconfigget_text_config_get_ao_versionr   Versionr>   get_apply_tensor_subclassrsplitfqn_to_config
startswithmodule_fqn_to_configr   	fullmatchgetr(   clonediscard_is_hf_initialized
parametersrL   )r+   r6   r7   r8   missing_keyskwargsr:   _valuemoduletensor_nameinput_embedis_embedding_paramrY   r>   r[   
module_fqntop_level_param_namecmaybe_module_fqn_patternlm_headparamcustom_param_fqn_configrL   s                           r   convertzTorchAoQuantize.convertP   so    	3))+,Q/5&ud3a25/J*/((*<*<URWReRe*<*f;' 002Z2k?:"&"3"3"G"G"_"_"'9ELL000>@UW\] 00@@BgooV^F__8&&::TTVF&+.3B3I3I#q3Q0
0"f&:&::)44U; b; 33ODA6#7#77)44U; c; 33J?A 5;4H4H N07BB5I$\\*B12*FX & ; ;<T UA!\\*B12*F
S & ; ;<T UA!N #77;;JM=+x7-2I&,mm&9&9&;G!&!.AC$,,_=481
 &,%6%6u%6%E <E7;E4<>PUl 0':trtt 3>?SUV>W2X/!&*ATR$,,_=481%+%6%6u%6%E <E7;E4<!	'// 22BBDX`Haa>&&::TTVF&"34 / 6 6sA >
A!<!<<)44U; c; 33J?A4:4O4O N07BB5I$\\*B12*F
S & ; ;<T UA!N #77;;JM=).E"(--"5"5"7fa3FG ((904F-!'!2!25!2!A 83708:LQh,g6pnpp'//"9mm))+G&$++??YY[\_-$(!&&u&5 	,E'+E$	,.@E\ '*dbddr   )NNN)r$   
__module____qualname__r4   dictstrrR   TensorrS   Modulery   rD   r   r   r.   r.   L   st    ) )-&*Eeell*+Ee xx%Ee t	Ee 
c5<<	 Eer   r.   c                       e Zd Zd Z	 	 	 	 d	deeej                  f   dee   dz  dej                  j                  dz  dedz  deeej                  f   f
dZy)
TorchAoDeserializec                     || _         y r0   r1   r3   s     r   r4   zTorchAoDeserialize.__init__   r5   r   Nr6   source_patternsr7   r8   r   c           
         t        |j                               d   |v}i }dj                  |j                  d      dd       }	|r"t	        |d   t               r	|d   d   }
nZ|d   }
nT|j                         D ]A  }t        ||         dk7  rt        d| dt        ||          d	      ||   d   ||	 d| <   C |r|
iS t        j                  d
      t        k  rt        | j                  j                        st        d      t        || j                  j                        \  }}|rJ ||   }t        ||      \  }}t	        |t        j                   j"                        rt%        j&                  t(        |      |_        ||iS )a&  
        Consolidates tensor subclass components before reconstructing the object

        For example:
            input_dict: {
                "_weight_qdata": torch.Tensor,
                "_weight_scale": torch.Tensor,
            }
            full_layer_name: "model.layers.0.self_attn.k_proj.weight"

            Given this, we reconstruct a Float8Tensor instance using the qdata and scale
            and return it as a dictionary with the full_layer_name as the key and the recovered
            Float8Tensor instance as the value.
        r   r?   Nr(   r   zExpected a single tensor for z	 but got z tensors insteadr   zOTo use `safetensors` serialization, you should have `torchao>=0.15.0` installed)rQ   keysjoinsplitr"   len
ValueErrorr   parseTORCHAO_VERSIONr   r2   metadatar   r	   rR   rS   Lineartypes
MethodTyper,   
extra_repr)r+   r6   r   r7   r8   rj   rk   is_unsafe_serialization
param_data
layer_namer(   suffixunflattened_state_dictleftover_state_dict	new_paramrn   rl   s                    r   ry   zTorchAoDeserialize.convert   s   . #'z'8"9!"<O"S
XXo33C8"=>
"*X.5#H-a0#H-$//+ Mz&)*a/$7xyZX^M_I`Haaqr  8B&7I!7L
j\6(34M ##V,,--)_<ATUYUfUfUoUoApnoo6Q))227
3 3 '&&*?;	(@	fehhoo. % 0 01CV LF++r   )NNNN)r$   rz   r{   r4   r|   r}   rR   r~   rQ   rS   r   ry   rD   r   r   r   r      s    ) -1(,&*:,ell*+:, cT):, xx%	:,
 t:, 
c5<<	 :,r   r   ) importlib.metadata	importlibr   r   rR   	packagingr   transformers.utilsr   transformers.utils.import_utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr	   r   r   r   1torchao.prototype.safetensors.safetensors_supportr   /torchao.prototype.safetensors.safetensors_utilsr   
get_loggerr$   loggerr}   r   r%   r,   r.   r   rD   r   r   <module>r      s     	    & T 2 > #gmmI$6$6$>$>y$IJOw}}Y''//	:;}w}}X?VV	
 	X			H	%# #* PkIem IeX>, >,r   