
    qiw              	       X   d Z ddlmZ ddlmZ ddlmZmZmZm	Z	  e       r
ddl
Z
ddlmZ  e       rddlmZ ddlmZmZ  e       rdd	lmZ  e	j(                  e      Zdd
ZdededdfdZddedededefdZ G d de
j                  j6                        Zddee   dz  fdZddZ y)zZHIGGS through FLUTE (Flexible Lookup Table Engine for LUT-quantized LLMs) integration file    )sqrt   )should_convert_module)is_flute_availableis_hadamard_availableis_torch_availableloggingN)prepare_data_transposed)TuneMetaDataqgemm_v2)hadamard_transformc                    t        dt        | j                        z        D cg c]  }d }}|D ]/  }| j                  |   }|dz
  |z  dz   |z  }||z
  }	|	|d|z  dz
  <   1 t        j                  j                  | |d|      S c c}w )Nr   r      constant)rangelenshapenn
functionalpad)
tensordimshad_block_sizevalue_pad_dimsdimsizenext_multiple_of_1024deltas
             Q/opt/pipecat/venv/lib/python3.12/site-packages/transformers/integrations/higgs.pypad_to_blockr#   $   s     S%6!678a8H8 '||C "&(~!=!A^ S%,!&cA	' ==VXz5AA 9s   	Bpnreturnztorch.Tensorc                    | |fdk(  rt        j                  g ddgddgddgdd	gd
dgddgddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gd<d=gd>d?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgd dgddgddgddgdd	gd
dgddgddgddgddgddgddgddgddgddgddgd d!gd"d#gd$d%gd&d'gd(d)gd*d+gd,d-gd.d/gd0d1gd2d3gd4d5gd6d7gd8d9gd:d;gd<d=gd>d?gd@dAgdBdCgdDdEgdFdGgdHdIgdJdKgdLdMgdNdOgdPdQgdRdSgdTdUgdVdWgdXdYgdZd[gd\d]gd^d_gd`dagdbdcgdddegdfdggdhdigdjdkgdldmgdndogdpdqgdrdsgdtdugdvdwgdxdygdzd{gd|d}gd~dgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgdĐdgdƐdgdȐdgdʐdgd̐dgdΐdgdАdgdҐdgdԐdgd֐dgdؐdgdڐdgdܐdgdސdgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgd dg      S | |fdk(  rt        j                  g ddgddgddgd	d
gddgddgddgddgddgddgddgddgddgddgdd gd!d"gd#d$gd%d&gd'd(gd)d*gd+d,gd-d.gd/d0gd1d2gd3d4gd5d6gd7d8gd9d:gd;d<gd=d>gd?d@gdAdBgdCdDgdEdFgdGdHgdIdJgdKdLgdMdNgdOdPgdQdRgdSdTgdUdVgdWdXgdYdZgd[d\gd]d^gd_d`gdadbgdcddgdedfgdgdhgdidjgdkdlgdmdngdodpgdqdrgdsdtgdudvgdwdxgdydzgd{d|gd}d~gddgddg      S | |fdk(  ret        j                  ddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgddgg      S | |fdk(  rEt        j                  dgdgdgdgdgdgdgdgdgdgdgdgdgdgdgdgg      S | |fdk(  r-t        j                  dgdgdgdgdgdgdgdgg      S | |fdk(  r!t        j                  dgdgdgdgg      S t        d|  d|       (  N)r      g   g    f?g    Bg   ]?g   _g   \?g   9g   @g   ?g   g   @y?g   Ђ
@g   @<?g   4@g   @V
@g   i@g   ?g   [g   U g    v?g   `~?g   ?g   F?g    g   #	@g   ??g    u?g    g   ?g   !	g   v-?g    οg    W?g   Dg   @?g   -?g   `g   `?g   g    Կg   iǿg   5?g   `Xg    #8g    g    Eg   1߿g   `c?g   g   g    g    ??g    *V?g   ?g   @?g    ?g   ^
g   @xg    n|g   @g   `pg    5g   `Ag   ?g   @UVĿg   yg   Sg    d?g    g   g   `KG?g   `?g   ?g   	?g   K@g   @?g   Y?g   sg   `|οg   @?g     o@g    Dg   @?g    2?g   ?g    g   R @g   `(?g    Er?g   @N0g   `'/?g   g   >?g   ?g   `Qcg   2g   :7g   dig    c?g   `ֿg   `ؿg   ?g    +@g   ?g   `(f?g   @8ig   =g   򩣿g   
g   +g   fg    g   Lg   x?g   `*(g    ?g    /տg   @/aſg   Eg   gg    ?g   Gg   {?g   h?g   _a?g   ?g   ?g   3@g   #?g   yg   ѱܿg    
@g   ׿g    d,g   g   &g   ٿg   `{X?g   Yb?g   @"g    يg   3g   T g   @@g   kg   g   `{ڿg    ?g   >g   	?g   g   `5g   ?g    ?g   5?g   ?g   Iؿg    A?g   ?g   ѿg   9?g   @@g   zg    j	g    g   @g   @N?g    }g   ?g    Zg   `@g   cg    ҿg   @=Dg   g    ?g   `g   b@g   {@g    h @g    g   ?g   ]ƿg   K?g   g   w?g   `?g    4?g    b?g    g   `ſg   p?g   @?g   @Q=s?g   @տg   g   @@g   1u?g   ͔ƿg   @?g   䉫?g   `@g   _g    @g    ,?g   `dg   `T?g   L?g   b?g   3?g   ȃg   xg    v?g   `?g   섥g   K3g   /ۿg   `Eg   @#?g   3?g   
g    c?g    1g    ?g   `%?g    %?g   @w@g   I?g   kg    g   @J߿g   ?g    ǿg   Fֿg   9?g   ` g   8?g   `@g    xg   @wg    x?g    g   @ҿg   ͆g    vg    ?g   n@g   `9ǿg   Y?g    g   @?g    [m?g    F?g   @?g   `Ьg   `Gg    c?g   @g   Pug   @?g   g   @Hg    4g   @ g   wg   #?g    d?g   Mg    g    ?g   ?g   @vPg   ſg   r?g   Lg    ?g   @?g    :ڿg    ?g   ?g   `
?g   @_ g   $Rg    Z?g   3g   `g    ?g   @Og   ew@g    Lg   V?g   ߩҿg    V?g   ?g   g   ?g   `;?g    Կg   `@g   -g    ?g   `g   ZY?g    g    	?g    8;?g    ?g   aοg   @Rh?g   `F
?g   A?g   @?g    g   `g    8?g   `>?g   ?g   @pg   i?g    ?g   `пg   @ @g   B?g    bg   @ @g    g   `\g   g   )W?g   `?g   `g   `@g    0$ӿg    >g    i?g   cg   ?g   H?g    T?g    ]g   .ܿg   yؿg   *g   `F?g   @g   `Is?g   _;?g   E?g    g   `<?g   `B?g   @ެ?g   `5g   ׿g   `Sg   +Dg   @4?g    T?g   W@g   ܨݿg   1?g   	g    xg   `?g    ?g   h?g   `g   Կg   g    ȿg   -g   `?g    ֭g   ?g   `?g   ?g   `$Zg   G)?g   ?g    3g   %g   `axg   ?g   @Ŀg    ?g   g   g   ڜ?g   @&C?g   `"?g   @Cg   nͿg   92ֿg   @g   g   `g   g   =?g   `L?g   g    X?g    Sg   @g   `T @g   @?g    ҿg    g   `g    X
ڿg    <?g   `gg   ؿg    yg   z?g   ĝ?g   + ?g   $u?g   mg   Pvg   ?g   lg   	ؿg   ?g   y@g   @xɿg    m?g   (g   1ٿg   @qg    g   ?g   2Ig   t?g     i?g    	g   @g   `s?g   `
@g    M.g    ?g    ?g   `4g   g   @j	g   @Cg   oEg   ?g    3@g   k?g   ?g    ?g   @߿g   ޟ?g   οg   og   7g   +g    ſg   pj?g   g    g   @g   (g    =g   0?g   `>޿g    .g   @\?g   `[?g    #?g    ?g   ?g   g   N?g   jg   =?g   R޿g   耎?g   `g   Lg    @g   =g   @g   @?g   R@g   rg   g   ?g   `v_?g   @T?g   ?g   @?g   8?g    {}g   i?g   Sg   @k?g   s?g    ?g   ?g    _g   @3@g   ?g   @ ?g   `Xrg   [g   g   ?g   `?g   @?g    5g   ?)r   @   g   g   x?g    g   k?g    g   l?g   y?g   6g   ?g   пg   `5?g   k@g   $g    z?g    1J@g   ?g   o?g   .g   hg   ӿg   _?g   ?g   $?g   g   6@g   @T?g   -?g   g    @g   g   ù?g   .?g   ?g   fg   ?g   ,'?g    ?g   ^@g   @Lg    >ۿg   4bg   ?g    Ng   ޿g    ?g   nZg   tg   i~@g    Jf?g   ^Gg   `mg   `6@g   /?g    ?g    Oܿg   ?g   g   gg    ^Dg   Fg   |g   1g   `8g   @*?g   ֿg   <Sg    ^g   @^?g    Wig    ?g    1?g   6?g   @J?g   @(p?g   @??g   Ơ?g    D:?g   g   `uڿg   nͿg    @g    [g    j?g   =?g     ?g    Z¿g   ?g   ?g   G?g   ܿg   `9?g   g   ?g   ?g   :eg    zg   kg    g   m?g    [rٿg   ߿g   ?g    y?g    ?g   ?g   `/g   (&g    C?g   @ݿg   g   `ٿg   @8lg   hg   9?g   ߫g   ?g   z|g    y吿g   `ݨg   @g   ` ?g   g    ;Sg   `?g   "տg   `1?g   "1?g   W?)r      g   
g   @:-g   g    l?g   g   `?g   ;?g   @wg   g   @Lug    C?g   F?g   @?g   @d?g   `F?g    :Ƣ?g   ?g   g   `(g   Eg   g   ¦?g   `?g   Oӿg   @?g   ?g   LSg   &,?g   ?g   Sg   @{Ŀg   ?)r   r*   g   Wg   X g   g   g    'g   +g   ؿg   ?og   ?o?g   ?g   +?g    '?g   ?g   ?g   X @g   W@)r      g   @/7g    g    21g    ?_Ͽg    ?_?g    21?g    ?g   @/7@)r      g   *g    Yܿg    Y?g   *?zUnsupported p=z, n=)torchr   NotImplementedError)r$   r%   s     r"   get_higgs_gridr/   /   sf   	1v||A#%89A$&89A %&89A $%89	A
 $%89A $%78A $%78A #$78A $%89A %&:;A %&89A $%89A #$67A $%89A $%89A  $%9:!A" $%89#A$ $%78%A& %&67'A( #$78)A* %&89+A, %&9:-A. &'9:/A0 $%781A2 %&9:3A4 %&895A6 $%787A8 $%789A: $%89;A< %&9:=A> %&9:?A@ $%78AAB %&89CAD %&89EAF %&9:GAH $%78IAJ #$67KAL #$67MAN $%78OAP &';<QAR #$78SAT $%67UAV #$78WAX $%78YAZ $%89[A\ $%89]A^ $%89_A` %&89aAb %&9:cAd $%9:eAf %&9:gAh #$67iAj #$78kAl $%:;mAn %&9:oAp %&9:qAr %&89sAt %&89uAv &';<wAx %&89yAz $%78{A| &'9:}A~ &'9:A@ #$56AAB #$78CAD &'89EAF %&9:GAH %&9:IAJ &'9:KAL #$78MAN '(9:OAP %&89QAR %&9:SAT %&89UAV $%67WAX $%89YAZ %&89[A\ $%78]A^ &'89_A` %&:;aAb %&89cAd '(:;eAf %&9:gAh $%78iAj $%89kAl $%89mAn $%89oAp %&78qAr $%67sAt $%67uAv %&78wAx %&78yAz %&89{A| $%78}A~ #$89A@ &'9:AAB $%9:CAD &':;EAF #$67GAH &'9:IAJ %&78KAL %&78MAN #$67OAP $%78QAR $%78SAT &';<UAV $%89WAX &':;YAZ %&9:[A\ %&89]A^ %&9:_A` $%78aAb $%89cAd #$67eAf %&9:gAh %&89iAj %&9:kAl $%89mAn $%67oAp %&9:qAr %&9:sAt %&9:uAv %&89wAx #$89yAz $%89{A| $%89}A~ %&78A@ %&9:AAB $%89CAD %&89EAF %&9:GAH %&9:IAJ %&89KAL %&;<MAN %&89OAP %&78QAR &'9:SAT #$56UAV $%89WAX $%67YAZ &':;[A\ %&89]A^ $%89_A` %&9:aAb $%89cAd $%89eAf %&89gAh %&89iAj $%9:kAl $%9:mAn #$78oAp $%89qAr $%78sAt #$78uAv $%67wAx $%78yAz %&9:{A| $%78}A~ $%78A@ #$67AAB %&89CAD #$:;EAF $%89GAH %&89IAJ $%78KAL %&78MAN &':;OAP $%78QAR $%89SAT %&9:UAV %&:;WAX #$56YAZ #$89[A\ #$78]A^ $%89_A` %&89aAb %&:;cAd %&9:eAf $%89gAh "#78iAj $%9:kAl %&78mAn $%89oAp %&9:qAr %&9:sAt %&89uAv $%78wAx %&9:yAz '(;<{A| $%89}A~ %&78A@ $%9:AAB $%78CAD %&89EAF $%78GAH %&9:IAJ &'9:KAL %&9:MAN %&89OAP #$78QAR $%89SAT $%78UAV %&9:WAX $%89YAZ %&89[A\ %&9:]A^ %&89_A` $%78aAb $%89cAd %&9:eAf %&:;gAh $%78iAj %&78kAl $%9:mAn %&89oAp $%89qAr $%78sAt %&78uAv $%78wAx %&9:yAz $%89{A| %&9:}A~ %&9:A@ $%67AAB #$67CAD %&9:EAF $%9:GAH %&9:IAJ %&9:KAL $%89MAN %&9:OAP %&9:QAR %&:;SAT $%89UAV #$67WAX $%78YAZ %&9:[A\ $%78]A^ %&:;_A` %&9:aAb $%89cAd #$67eAf #$78gAh %&89iAj %&9:kAl $%78mAn $%:;oAp %&78qAr $%78sAt $%89uAv %&78wAx $%89yAz &'9:{A| %&9:}A~ $%67A@ %&89AAC
 C	
H 	
1v||A$&9:A#%78A %&89A #$78	A
 $%89A $%78A &'9:A #$56A $%89A $%9:A %&89A #$78A $%78A $%89A $%78A  #$78!A" $%78#A$ $%78%A& $%67'A( $%89)A* &'9:+A, %&:;-A. &':;/A0 %&671A2 %&893A4 %&785A6 $%787A8 $%789A: %&9:;A< %&89=A> $%89?A@ %&9:AAB %&9:CAD %&89EAF %&89GAH %&89IAJ $%78KAL $%89MAN $%89OAP &';<QAR $%89SAT #$67UAV %&:;WAX $%89YAZ %&:;[A\ &':;]A^ $%89_A` %&9:aAb %&9:cAd $%9:eAf %&89gAh $%78iAj $%89kAl $%78mAn &':;oAp %&9:qAr %&89sAt %&89uAv %&:;wAx %&9:yAz #$67{A| '(<=}A~ %&67A@ $%78AAC
 C	
H Q7	||$&9:#%78#%67$&9:$&89#%78#%78"$89$&9:$&9:$&9:"$78#%78')?@#%78%'9:!
 	
( Q7	||$%#$$%$%$%$%%&%&$%$%#$#$#$#$"##$!
 	
( Q6	||$%$%$%$%#$#$#$#$	
 	
 Q6	||125H4IL^K_btauvww!N1#T!"=>>    bits
group_sizehadamard_sizec                    t        | j                        dk(  sJ d       t        |d||z  z        j                  | j                        }t
        j                  j                  |d      dz  }| j                  }| j                  }| j                  dt
        j                        } t        | dg|      } | j                  d   |z  }	| j                  d|	|      } t
        j                  j                  | d      }
t        | d      |
d d d d d f   z  } t        | dg|      j                  | j                  d   |	d|      } t        j                  | j                  d d |t
        j                  	      }t        d| j                  d   d
      D ]U  }t        j                   d| ||d
z    z  |j"                  z  |z
  d      j                  t
        j                        |||d
z    W ~ |j                  |j                  d   d      }|
t%        |      z  }
t'        |t        j(                  |
j                  |      ||z  d      |j                  |      |||||d	      \  } }
}}}| |
||j+                  t
        j,                        |dS )Nr   z%Only 2D weights are supported for now)axisT)copydtyper   r   devicer8   r*   )r   F)num_bitsr2   vector_sizer8   r:   check_correctnessr8   )weightscalestablestables2tune_metadata)r   r   r/   tor:   r-   linalgnormr8   float32r#   reshaper   emptyuint8r   argmaxTr   r
   repeat_interleaveviewfloat16)r?   r1   r$   r2   r3   gridgrid_norm_2r:   r8   multr@   codesirA   rB   rC   s                   r"   quantize_with_higgsrU     sY   v||!J#JJ!!Q1t8_-00?D,,##Dr#2a7K]]FLLEYYDY6F&1#}5F <<?m+D^^Bm4F\\vB/F*VAq$J-??F &1#q)11&,,q/4QOF KKSb)&LE1fll1or* p!LLVAB-?)?$&&)H;)V\^_bbchcncnoa!b&pMM%++a."-Ed=))F5L		% 0-:2MSTU
62FFFG] <<emm<4& r0   c                        e Zd Z	 	 	 	 	 ddedededej
                  dz  dej                  dz  dedef fd	Zd
 Z xZ	S )HiggsLinearNin_featuresout_featuresr;   r8   r:   r2   r3   c	                    t         	|           || _        || _        || _        || _        || _        ||z  dk(  sJ |dv sJ t        j                  t        j                  ||z  dz  |ft        j                  |      d      | _        t        j                  t        j                  |||z  f||      d      | _        t        j                  t        j                  d|z  f||      d      | _        t        j                  t        j                  d|z  d|z  df||      d      | _        |r3t        j                  t        j                  |||      d      | _        n| j#                  d	d        d | _        d | _        y )
Nr   )r      r,   r*   )r8   r:   F)requires_gradr   r9   bias)super__init__rX   rY   r;   r2   r3   r   	Parameterr-   rI   int16r?   r@   rA   rB   r]   register_parameter	workspacerC   )
selfrX   rY   r;   r]   r8   r:   r2   r3   	__class__s
            r"   r_   zHiggsLinear.__init__  s^    	&( $*Z'1,,,9$$$llKK0B6DEKK`fg
 llKK{j'@AW]^ns
 ll5;;8~USY#Zjop||KKHak15U6Rbg
 U[[fTY%ZjopDI##FD1+/r0   c           
      N   t        |dg| j                        }| j                  t        d      t	        || j
                  | j                  | j                  | j                  j                  t        j                        | j                  | j                  | j                        S )Nr5   z,Workspace must be set before calling forwardr>   )r3   )r#   r3   rc   	Exceptionr   r?   r@   rA   rB   rN   r-   rG   rC   )rd   xs     r"   forwardzHiggsLinear.forward  s    RD$"4"45>>!JKKKKKKKKLLEMM2NN,,	
 		
r0   )TNNr(      )
__name__
__module____qualname__intr-   r8   r:   r_   ri   __classcell__)re   s   @r"   rW   rW     su     $(&*!'0'0 '0 	'0 {{T!'0 t#'0 '0 '0R
r0   rW   modules_to_not_convertc           
         d}| j                         D ]  \  }}t        ||      st        j                  d      5  t	        |t
        j                        rt        |j                  |j                  |j                  du|j                  |j                  |j                        }t        |      |_        |j!                  d       | j#                  ||       d}ddd        |st$        j'                  d       | S # 1 sw Y   xY w)a_  
    Public method that replaces the Linear layers of the given model with HIGGS quantized layers.

    Args:
        model (`torch.nn.Module`):
            The model to convert, can be any `torch.nn.Module` instance.
        modules_to_not_convert (`list[str]`, *optional*, defaults to `None`):
            A list of nn.Linear weights to not convert. If a parameter path is in the list (e.g. `lm_head.weight`), the corresponding module will not be
            converted.
        quantization_config (`HiggsConfig`):
            The quantization config object that contains the quantization parameters.
    FmetaN)r]   r;   r3   r2   TzYou are loading your model using eetq but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)named_modulesr   r-   r:   
isinstancer   LinearrW   rX   rY   r]   r1   r3   r2   type
source_clsrequires_grad_set_submoduleloggerwarning)modelrp   quantization_confighas_been_replacedmodule_namemodule
new_modules          r"   replace_with_higgs_linearr   &  s     $224 )V$[2HI\\&! 	)&")),(&&''D0055"5"C"C2==
 )-V
%))%0##K<$(!	) 	))$ 	

 L+	) 	)s   BC==D	c           	      L   t        j                         5  | j                         D ]e  \  }}|g }|j                  |       t	        |t
              r|j                  }|j                  }t         j                  j                  |||j                  du|j                  j                  |j                  j                        | j                  |<    |t        j                  ||j                  j                  |j                  j                              j                   j#                         | j                  |   j$                  _        t)        t+        |j-                                     dkD  rt/        ||      }|j1                  d       h | cddd       S # 1 sw Y   yxY w)a  
    Dequantizes the HiggsLinear layers in the given model by replacing them with standard torch.nn.Linear layers.
    Args:
        model (torch.nn.Module): The model containing HiggsLinear layers to be dequantized.
        current_key_name (list, optional): A list to keep track of the current module names during recursion. Defaults to None.
    Returns:
        torch.nn.Module: The model with HiggsLinear layers replaced by torch.nn.Linear layers.
    N)r]   r:   r8   r9   r   )current_key_namer5   )r-   no_gradnamed_childrenappendrt   rW   rX   rY   r   ru   r]   r@   r:   r8   _moduleseyerL   
contiguousr?   datar   listchildrendequantize_higgspop)r|   r   namer   rX   rY   r   s          r"   r   r   Q  sT    
 !002 	%LD&'#% ##D)&+.$00%22',xx D0!==// ---- (7 (t$ 4:IIk&--2F2FfmmNaNab4!JJL t$++0 4)*+a/$%5
   $7	%8 ;  s   E;FF#)r   )r,   r   r(   rj   )NN)N)!__doc__mathr   quantizers.quantizers_utilsr   utilsr   r   r   r	   r-   torch.nnr   flute.integrations.higgsr
   
flute.tuner   r   fast_hadamard_transformr   
get_loggerrk   rz   r#   rn   r/   rU   ModulerW   r   strr   r    r0   r"   <module>r      s    ]  ? Z Z @1:			H	%BE?c E?c E?n E?P0c 0# 0s 0ad 0f9
%((// 9
x(T#Y=M (V'r0   