
    qi8                        d dl Z ddlmZ ddlmZmZ ddlmZmZm	Z	m
Z
mZ  e	       rd dlZ e
       rd dlZd dlmZ ddlmZ  e       rd dlZd dlmZmZ  ej.                  e      Z G d d	e      Z G d
 de      Z G d de      Z G d de      Z	 	 	 ddej                   j<                  dee    dz  fdZ!dddZ"d Z#ddZ$ddZ%y)    N   )ConversionOps)get_module_from_nameshould_convert_module)get_available_devicesis_accelerate_availableis_bitsandbytes_availableis_torch_availablelogging)Conv1D)add_hook_to_moduleremove_hook_from_modulec                       e Zd Zd Z	 	 ddeeeej                     f   dedz  dej                  j                  dz  deeej                  f   fdZy)	Bnb4bitQuantizec                     || _         y Nhf_quantizerselfr   s     X/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/bitsandbytes.py__init__zBnb4bitQuantize.__init__   
    (    N
input_dictfull_layer_namemodelreturnc                 p   t        |j                               d   }|d   }t        ||      \  }}t        |j                  t
              r|j                  }|j                  |      }t        j                  j                  |fddi|j                  j                  |j                        }	d|_        ||	iS )a:  
        we need to store some parameters to create the quantized weight. For example, bnb requires 6 values that are stored in the checkpoint to recover the quantized weight. So we store them in a dict that it stored in hf_quantizer for now as we can't save it in the op since we create an op per tensor.
        r   requires_gradFT)listvaluesr   
issubclass
source_clsr   Tget_parameter_or_bufferbnbnn
Params4bit__dict__todevice_is_hf_initialized)
r   r   r   r   kwargsvaluemodule_	old_value	new_values
             r   convertzBnb4bitQuantize.convert"   s     Z&&()!,a )@	 f''0GGE11/B	FF%%eW5WIDVDVWZZ[`[g[gh	$(!++r   NN__name__
__module____qualname__r   dictstrr!   torchTensorr(   Moduler4    r   r   r   r      sl    ) '+(,	,d5<<001, t, xx%	, 
c5<<	 ,r   r   c                       e Zd Zd Z	 	 ddeeeej                     f   dej                  j                  dz  dedz  deeej                  f   fdZy)	Bnb4bitDeserializec                     || _         y r   r   r   s     r   r   zBnb4bitDeserialize.__init__>   r   r   Nr   r   r   r   c                 N   t        |      dk(  r|S |j                         D ]  \  }}t        |t              s|d   ||<     d}|j	                  |      }t        ||      \  }	}
t        j                  j                  j                  ||d|j                  |	      }d|	_        ||iS )z_
        Deserialization of bnb keys. We need 6 keys to recreate the quantized weights
           r   weightF)dataquantized_statsr    r,   r0   T)lenitems
isinstancer!   popr   r'   r(   r)   from_prequantizedr,   r-   )r   r   r   r   r.   keyr/   
key_weightrE   r0   r1   r3   s               r   r4   zBnb4bitDeserialize.convertA   s     z?a$**, 	+JC%&"'(
3	+ 

+(@	FF%%77&== 8 
	 %)!I&&r   r5   r6   r?   r   r   rA   rA   =   sl    ) )-&*	'd5<<001' xx%' t	' 
c5<<	 'r   rA   c                       e Zd Zd Z	 	 ddeeeej                     f   dej                  j                  dz  dedz  deeej                  f   fdZy)	Bnb8bitQuantizec                     || _         y r   r   r   s     r   r   zBnb8bitQuantize.__init__a   r   r   Nr   r   r   r   c                    t        |j                               d   }t        |t               r|d   n|}t        ||      \  }}t	        |j
                  t              r|j                  }|j                  }|j                  |      j                  }|j                  dd        t        j                  j                  |j                  d      fddi|j                  |      }	||	iS )Nr   SCBcpur    F)r!   r"   rJ   r   r#   r$   r   r%   r,   r&   r*   rK   r'   r(   
Int8Paramsr+   )
r   r   r   r   r.   r/   r0   r1   value_devicer3   s
             r   r4   zBnb8bitQuantize.convertd   s     Z&&()!,&ud3a(@	 f''0GGE||..?HH

5$FF%%ehhuoUUUfUXXYef	++r   r5   r6   r?   r   r   rP   rP   `   sl    ) )-&*	,d5<<001, xx%, t	, 
c5<<	 ,r   rP   c                       e Zd Zd Z	 	 ddeeeej                     f   dej                  j                  dz  dedz  deeej                  f   fdZy)	Bnb8bitDeserializec                     || _         y r   r   r   s     r   r   zBnb8bitDeserialize.__init__|   r   r   Nr   r   r   r   c                    t        |      dk(  r|S |j                         D ]  \  }}t        |t              s|d   ||<     t	        ||      \  }}d}	||	   }
|j                  |      j                  }|d   |d<   t        j                  j                  |
fddi|j                  |
j                        }d|_        |	|iS )z.
        Deserialization of bnb keys.
        rD   r   rE   rS   r    FT)rH   rI   rJ   r!   r   r&   r*   r'   r(   rU   r+   r,   r-   )r   r   r   r   r.   rM   r/   r0   r1   rN   rE   r3   s               r   r4   zBnb8bitDeserialize.convert   s     z?a $**, 	+JC%&"'(
3	+ )@	
J'..?HH"5)uFF%%fLELVLOOPVP]P]^	$(!I&&r   r5   r6   r?   r   r   rX   rX   {   sl    ) )-&*	'd5<<001' xx%' t	' 
c5<<	 'r   rX   r   modules_to_not_convertc                    d}| j                         D ]  \  }}t        ||      sd}t        j                  d      5  t	        |t
        j                  t        f      rt	        |t              r|j                  j                  \  }}	n|j                  }|j                  }	|j                         dk(  rt        j
                  j                  ||	|j                  du|j                   |j"                        }|r|j                  j$                  j'                  t        j(                        |j                  _        nt        j
                  j+                  ||	|j                  du|j,                  |j.                  |j0                  |j2                        }|r?|j                  j$                  j'                  |j2                        |j                  _        |5t5        |      |_        |j9                  d       | j;                  ||       d}ddd        |st<        j?                  d	       | S # 1 sw Y   ,xY w)
a  
    A helper function to replace all `torch.nn.Linear` modules by bnb modules from the `bitsandbytes` library.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        modules_to_not_convert (`list[str]`, defaults to `None`):
            A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
            converted.
        quantization_config (`BitsAndBytesConfig`):
            The quantization config object that contains the quantization parameters.
        pre_quantized (`book`, defaults to `False`):
            Whether the model is pre-quantized or not
    FNmetallm_int8)has_fp16_weights	threshold)dtype)compress_statistics
quant_typequant_storageTzYou are loading your model using eetq but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.) named_modulesr   r<   r,   rJ   r(   Linearr   rE   shapein_featuresout_featuresquantization_methodr'   Linear8bitLtbiasllm_int8_has_fp16_weightllm_int8_thresholdrF   r+   int8
Linear4bitbnb_4bit_compute_dtypebnb_4bit_use_double_quantbnb_4bit_quant_typebnb_4bit_quant_storagetyper$   requires_grad_set_submoduleloggerwarning)
r   r[   quantization_configpre_quantizedhas_been_replacedmodule_namer0   
new_modulerh   ri   s
             r   replace_with_bnb_linearr      s   ( $224 ,-V$[2HI
\\&! (	-&299f"56ff-060C0C-K"("4"4K#)#6#6L&::<
J!$!4!4#$4/)<)U)U"5"H"H "5 "J % 2<1B1B1G1G1J1JQVQ[Q[1J1\
)).!$!2!2#$4/+BB,?,Y,Y#6#J#J&9&P&P "3 "J %1;1B1B1G1G1J1J"5"L"L 2K 2
)). ),0LJ)--e4''Z@(,%Q(	- (	-	,-\ 	

 La(	- (	-s   GH55H?	c                 ^   t        | t        j                  j                        st	        dt        |        d      | j                  j                  }|dvr| S |dk(  r6t        j                  j                  | j                  | j                        }|S |j                  | j                  |_        t        t        j                  d      r6t        j                  j                  | j                  |j                        }|S | j                  |j                  j!                  dd      z  dz  }|S )	z
    Helper function to dequantize 4bit or 8bit bnb weights.

    If the weight is not a bnb quantized weight, it will be returned as is.
    z1Input weight should be of type nn.Parameter, got z instead)r)   rU   r)   int8_vectorwise_dequantrD   g   @ ?)rJ   r<   r(   	Parameter	TypeErrorru   	__class__r7   r'   
functionaldequantize_4bitrF   quant_staterS   hasattrr   view)rE   statecls_nameoutput_tensordequantizeds        r   dequantize_bnb_weightr      s     fehh001KDQWL>Yabcc((H33<66v{{FDVDVWyyJJ	s~~89nn<<V[[%))T
  kkEIINN2q$99<PPr   c                    t        t        j                  | j                  j                        }| j
                  }i }t        j                  |j                        }|D ]  }||j                  v s||   ||<     |di |}|S )a  
    Creates a new hook based on the old hook. Use it only if you know what you are doing !
    This method is a copy of: https://github.com/huggingface/peft/blob/748f7968f3a31ec06a1c2b0328993319ad9a150a/src/peft/utils/other.py#L245
    with some changes
    r?   )
getattr
acceleratehooksr   r7   r*   inspect	signaturer   
parameters)old_hookold_hook_clsold_hook_attrfiltered_old_hook_attrold_hook_init_signatureknew_hooks          r   _create_accelerate_new_hookr   	  s     :++X-?-?-H-HIL%%M%//0E0EF 9'222(5a(8"1%9 545HOr   c                    |j                         }|dk(  rt        j                  j                  nt        j                  j                  }| j                         D ]  \  }}t        ||      st        j                  d      5  t        |dd      }t        j                  j                  |j                  |j                  |du      }ddd       |dk(  r|j                  nd}	t        j                  j                  t        |j                   |	            _        t        |j                   |	      }
|$t"        j%                  d|
j&                   d       n7t"        j%                  d|
j&                   d| d	       |
j)                  |      }
t        j                  j                  |
      |_        ||_        t-        |d
      r.|j.                  }t1        |      }t3        |       t5        ||       |j)                  |j                   j                         | j7                  ||       d} st"        j9                  d       | S # 1 sw Y   xY w)a&  
    Converts a quantized model into its dequantized original version. The newly converted model will have
    some performance drop compared to the original model before quantization - use it only for specific usecases
    such as QLoRA adapters merging.

    Returns the converted model.
    r^   r]   rl   N)rl   zThe modules are dequantized in zK. If you want to change the dtype, please specify `dtype` in `dequantize`. z and casted to ._hf_hookTz_For some reason the model has not been properly dequantized. You might see unexpected behavior.)rj   r'   r(   rk   rp   re   rJ   r<   r,   r   rf   rh   ri   r   r   r   rE   rx   warning_oncera   r+   rl   r   r   r   r   r   rw   ry   )r   rz   ra   quant_method
target_clsr}   r0   rl   r~   r   rE   r   r   r|   s                 r   dequantize_and_replacer     s    '::<L(4
(B$$HYHYJ$224 %Vfj)f% mvvt4"XX__V-?-?ATAT[_gk[k_l
m %1J$>FLLDE % 2 23HX]3^ _J*6==%@F}##5fll^  DO  P ##&Efll^Sbchbiij$kl5) % 2 26 :J"&
vz*!??6x@'/":x8MM&--../Z8 $3%6 m	
 L;m ms   AII	c                     t        t        dt                     }t        t                     }|j	                  |      s0| rd|xs d d| d}t        |      t        j                  d       yy)	zs
    Validates if the available devices are supported by bitsandbytes, optionally raising an exception if not.
    supported_torch_devicesz3None of the available devices `available_devices = NzY` are supported by the bitsandbytes version you have installed: `bnb_supported_devices = z`. Please check the docs to see if the backend you intend to use is available and how to install it: https://huggingface.co/docs/bitsandbytes/main/en/installationz+No supported devices found for bitsandbytesFT)r   r'   setr   intersectionRuntimeErrorrx   ry   )raise_exceptionbnb_supported_devicesavailable_deviceserr_msgs       r   !validate_bnb_backend_availabilityr   G  s     $C)BCEJ134))*?@EFWF_[_E`  az  {P  zQ Qr r  w''DEr   )NNFr   )rE   ztorch.nn.Parameterr5   )F)&r   core_model_loadingr   quantizers.quantizers_utilsr   r   utilsr   r   r	   r
   r   bitsandbytesr'   r<   torch.nnr(   pytorch_utilsr   r   accelerate.hooksr   r   
get_loggerr7   rx   r   rA   rP   rX   r>   r!   r;   r   r   r   r   r   r?   r   r   <module>r      s     . U  &L			H	%,m ,> '  'F,m ,6' 'H 04	J88??J I,J\<"*Zr   