
    Z j)              
          S r SSKJrJr  SSKJr  SSKJrJr  \" 5       (       a
  SSK	r	SSK
Jr  \R                  " \5      rSqS r " S S	\R"                  5      r   SS
\\   S-  S\4S jjrS\	R.                  S\S\4S jrS\	R.                  S\	R.                  S\	R.                  S\S\4
S jr " S S\5      r " S S\5      rg)a  
Metal affine quantization integration for transformers.

This module provides:
  - ``MetalLinear``: a drop-in replacement for ``nn.Linear`` that stores weights
    as affine-quantized uint32 packed tensors and uses the ``quantization-mlx``
    Metal kernels for the forward pass.
  - ``replace_with_metal_linear``: walks a model and swaps every eligible
    ``nn.Linear`` with ``MetalLinear``.
  - ``MetalQuantize`` / ``MetalDequantize``: weight conversion operations that
    participate in the new ``WeightConverter`` pipeline.

Weight layout (transposed, matching ``affine_qmm_t``):
  - ``weight``: ``[N, K_packed]`` (``uint32``) -- K is the packed dimension.
  - ``scales``:  ``[N, K // group_size]`` (``float16 / bfloat16``)
  - ``qbiases``: ``[N, K // group_size]`` (same dtype as scales)

The kernel call is ``affine_qmm_t(x, weight, scales, qbiases, group_size, bits)``
which computes ``y = x @ dequant(weight).T``, identical to ``nn.Linear``.
   )ConversionOps_IdentityOp)should_convert_module)is_torch_availablelogging    Nc                      [         c   SSKJn   U " S5      q [         $ [         $ ! [         a  n[	        SU S35      UeSnAff = f)z>Lazily load the quantization-mlx kernel from Hugging Face Hub.N   )
get_kernelz0kernels-community/mlx-quantization-metal-kernelsz9Failed to load the quantization-mlx kernel from the Hub: zm. Make sure you have `kernels` installed (`pip install kernels`) and are running on an Apple Silicon machine.)_metal_kernelhub_kernelsr   	ExceptionImportError)r   es     }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/metal_quantization.py_get_metal_kernelr   3   s`     		/&'YZM =  	KA3 O? ? 		s   # 
A=Ac                       \ rS rSrSrS\R                  SS4S\S\S\S	\S
\4
S jjr	S\R                  S\R                  4S jrSrg)MetalLinearI   z
A quantized linear layer that stores weights in affine uint32 packed format
and uses the ``quantization-mlx`` Metal kernels for the forward pass.

Parameters match ``nn.Linear`` with additional quantization metadata.
F      in_featuresout_featuresbiasbits
group_sizec                    [         R                  R                  U 5        Xl        X l        XPl        X`l        SU-  nX-  nX-  n	U[        R                  :X  a<  [         R                  " [        R                  " X([        R                  S9SS9U l        O-[         R                  " [        R                  " X!US9SS9U l        U[        R                  :X  a  [        R                  OS n
[         R                  " [        R                  " X)U
S9SS9U l        [         R                  " [        R                  " X)U
S9SS9U l        U(       a0  [         R                  " [        R                  " U5      5      U l        g U R!                  SS 5        g )N    )dtypeF)requires_gradr   )nnModule__init__r   r   r   r   torchuint32	Parameterzerosweightfloat32scalesqbiasesr   register_parameter)selfr   r   r   r   r   r   elems_per_intk_packedn_groupsscales_dtypes              r   r#   MetalLinear.__init__Q   s    			4 &(	$d
/,ELL ,,u{{<QVQ]Q]'^nstDK,,u{{<TY'ZjopDK(-(=u}}4ll5;;|\#Zjop||EKKl$[kpqU[[%>?DI##FD1    inputreturnc                    U R                   R                  [        R                  :w  a4  [        R
                  R                  XR                   U R                  5      $ [        5       nUR                  UU R                   U R                  R                  UR                  5      U R                  R                  UR                  5      U R                  U R                  5      nU R                  b  X0R                  -   nU$ N)r(   r   r$   r%   r!   
functionallinearr   r   affine_qmm_tr*   tor+   r   r   )r-   r4   kerneloutputs       r   forwardMetalLinear.forwards   s    ;;,==''{{DIIFF"$$$KKKKNN5;;'LLOOEKK(OOII
 99 ii'Fr3   )r   r   r   r   r   r+   r*   r(   N)__name__
__module____qualname____firstlineno____doc__r$   r%   intboolr#   Tensorr>   __static_attributes__ r3   r   r   r   I   sj     ll 2 2  2 	 2  2  2DU\\ ell r3   r   modules_to_not_convertpre_quantizedc           
         UR                   (       a  U $ UR                  nUR                  nSnU R                  5        H  u  px[	        Xq5      (       d  M  [        U[        R                  5      (       d  M8  U(       a  0 OSS0n	[        SUR                  UR                  UR                  SLUUS.U	D6n
U R                  Xz5        SnM     U(       d  [        R                  S5        U $ )aD  
Replace every eligible ``nn.Linear`` with ``MetalLinear``.

Args:
    model: the ``PreTrainedModel`` (on the meta device at this point).
    modules_to_not_convert: module names to leave untouched.
    quantization_config: the ``MetalConfig`` instance.
    pre_quantized: ``True`` when loading from a quantized checkpoint.
Fr   N)r   r   r   r   r   TzYou are loading a model with Metal quantization but no nn.Linear modules were found. Please double check your model architecture.rI   )
dequantizer   r   named_modulesr   
isinstancer!   Linearr   r   r   r   set_submoduleloggerwarning)modelrJ   quantization_configrK   r   r   has_been_replacedmodule_namemodulemodule_kwargs
new_modules              r   replace_with_metal_linearr[      s     %%##D$//J$224$[IIfbii(("/Bgt_M$ "..#00[[,%  J 8 $!  5$ ;	

 Lr3   r(   r   r   c                    U R                   u  p4SU-  nSU-  S-
  nXA-  nU R                  5       R                  X7U5      nUR                  SS9R                  n	UR                  SS9R                  n
X-
  U-  R                  SS9nU	nXR                  S5      -
  UR                  S5      -  nUR                  5       R                  SU5      R                  [        R                  5      R                  X45      nXE-  n[        R                  " X>[        R                  U R                  S9n[        U5       H  nXS	S	2US	U24   UU-  -  -  nM     UR                  [        R                  5      X4$ )
a8  
Quantize a 2-D float weight ``[N, K]`` into packed uint32 + scales + biases.

Returns ``(w_packed, scales, biases)`` with:
  - ``w_packed``: ``[N, K // (32 // bits)]`` uint32
  - ``scales``:   ``[N, K // group_size]`` float32/float16/bfloat16
  - ``biases``:   ``[N, K // group_size]`` float32/float16/bfloat16
r   r
   )dimg:0yE>)minr   r   deviceN)shapefloatreshaper_   valuesmaxclamp	unsqueezeroundr;   r$   int32r'   ra   ranger%   )r(   r   r   NKr.   max_valr0   	w_groupedw_minw_maxr*   biasesw_intr/   w_packedis                    r   _affine_quantize_tensorrv      sT    <<DA$JMDyAoGH&&qJ?IMMbM!((EMMbM!((E}'..4.8FF))"--1A1A"1EEEKKM7+..u{{;CCAIE !H{{1ekk&--PH=!!Q---.4!8<< " ;;u||$f44r3   rt   r*   rr   c                 *   U R                   S   nSU-  nSU-  S-
  nU R                   S   U-  nU R                  [        R                  5      n	[        R                  " XX[        R
                  U R                  S9n
[        U5       H%  nXU-  -	  U-  R                  5       U
SS2USU24'   M'     U
R                  USU5      nXR                  5       R                  S5      -  UR                  5       R                  S5      -   nUR                  XX5      $ )zj
Dequantize a packed uint32 weight ``[N, K_packed]`` back to float.

Returns a ``[N, K]`` float32 tensor.
r   r   r
   r`   Nr]   )rb   r;   r$   rj   r'   r)   ra   rk   rc   rd   rh   )rt   r*   rr   r   r   rl   r.   rn   rm   
w_packed_iw_flatru   ro   w_deqs                 r   _affine_dequantize_tensorr{      s     	qA$JMDyAoGqM)AU[[)J[[U]]8??KF=!(2ax(@G'K&R&R&Tq!"]""# " q"j1I0044v||~7O7OPR7SSE==r3   c                   0    \ rS rSrSrS rS\S\4S jrSrg)	MetalQuantize   z
Quantize a full-precision weight tensor into (weight, scales, qbiases).

Used during quantize-on-the-fly.  The float ``weight`` is replaced in-place
by the packed uint32 tensor.
c                     Xl         g r7   hf_quantizerr-   r   s     r   r#   MetalQuantize.__init__       (r3   
input_dictr5   c                    [        [        UR                  5       5      5      u  p4[        U[        5      (       a  US   OUnU R
                  R                  R                  nU R
                  R                  R                  n[        XFU5      u  pxn	SU;   a  UR                  SS5      S   OSn
U
(       a  U
 S3OSnU
(       a  U
 S3OSnUR                  nX7XR                  U5      XR                  U5      0$ )	Nr   .r
    z.scalesr*   z.qbiasesr+   )nextiteritemsrO   listr   rU   r   r   rv   rsplitr   r;   )r-   r   kwargs
target_keyvaluer   r   rt   r*   rr   base	scale_keybias_key
orig_dtypes                 r   convertMetalQuantize.convert   s     j&6&6&8!9:
&ud33a  4499&&::EE
#:5d#S &/2j/@z  a(+b(,tfG$(	(,dV8$)[[
yy,ii
+
 	
r3   r   N)	r@   rA   rB   rC   rD   r#   dictr   rH   rI   r3   r   r}   r}      s    )
$ 
T 
r3   r}   c                   R    \ rS rSrSrS rSS\S\S-  S\4S jjr\	SS	 j5       r
S
rg)MetalDequantizei  z
Dequantize (weight, scales, qbiases) back to a full-precision tensor.

Used when ``dequantize=True`` is set in the config to fall back to a normal
``nn.Linear`` on devices without MPS.
c                     Xl         g r7   r   r   s     r   r#   MetalDequantize.__init__  r   r3   Nr   full_layer_namer5   c                 .   U R                   R                  R                  nU R                   R                  R                  n[	        U5      S:  a  X!S   0$ US   S   nUS   S   nUS   S   n[        XgXU5      n	X)R                  UR                  5      0$ )Nr   zweight$r   r*   r+   )r   rU   r   r   lenr{   r;   r   )
r-   r   r   r   r   r   	quantizedr*   r+   rz   s
             r   r   MetalDequantize.convert  s      4499&&::EE
z?Q#	%:;;y)!,	H%a(Y'*))WRVW&,,!788r3   c                     [        5       $ r7   )r   )r-   s    r   
reverse_opMetalDequantize.reverse_op*  s
    }r3   r   r7   )r5   r   )r@   rA   rB   rC   rD   r#   r   strr   propertyr   rH   rI   r3   r   r   r     s?    )9$ 9t 9Y] 9  r3   r   )NNF)rD   core_model_loadingr   r   quantizers.quantizers_utilsr   utilsr   r   r$   torch.nnr!   
get_loggerr@   rR   r   r   rP   r   r   r   rF   r[   rG   rE   rv   r{   r}   r   rI   r3   r   <module>r      s   * < ? /  
		H	%,;")) ;@ 04	/ I,/ 	/d5ELL 5c 5 5Bll$)LL:?,,TW_b.
M 
@m r3   