
    Z j!)                         S SK r S SKrS SKrS SKJr  S SKJrJr  \" 5       (       a  SSKJ	r	  SSK
Jr  \" 5       (       a  S SKJr  S SKJr  \R                   " \5      rS	 rS
 r " S S\	5      r " S S\	5      rg)    N)logging)is_torch_availableis_torchao_available   )ConversionOps)get_module_from_name)unflatten_tensor_state_dict)is_metadata_torchaoc                 (   SSK Jn  SSKJn  [	        X5      (       a*  U R
                  R                   SU R                  5        S3$ [	        X5      (       a<  U R
                  R                   SU R                   S[        U R                  5       S3$ g )Nr   )AffineQuantizedTensor)LinearActivationQuantizedTensor()z(activation=	, weight=)
torchao.dtypesr   7torchao.quantization.linear_activation_quantized_tensorr   
isinstance	__class____name___quantization_typeinput_quant_funcoriginal_weight_tensor)weightr   r   s      r/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/torchao.pyr   r   &   s    4g&00""++,Af.G.G.I-J!LL&::""++,L9P9P8QQZ[mnt  oL  oL  \M  [N  NO  P  	P ;    c                    [        U R                  5      nUc7  SU R                  R                  S    SU R                  R                  S    S3$ SU R                  R                  S    SU R                  R                  S    SU 3$ )Nzin_features=   z, out_features=r   z, weight=Noner   )r   r   shape)selfr   s     r   _linear_extra_reprr    1   s    ,F~dkk//23?4;;CTCTUVCWBXXeffdkk//23?4;;CTCTUVCWBXXabhaijjr   c                       \ rS rSrS rS r   SS\\\R                  4   S\R                  R                  S-  S\S-  S\\\R                  4   4S	 jjrS
rg)TorchAoQuantize9   c                     Xl         g Nhf_quantizerr   r'   s     r   __init__TorchAoQuantize.__init__:       (r   c                 "   SSK Jn  [        UR                  5       5      R                  nU R
                  R                  (       a?  UR                  S:X  a/  UR                  S5        U" X/UQ70 UD6  UR                  S5        gU" X/UQ70 UD6  g)a  Run quantize_, moving to CUDA first if CPU offloading is active.

Some torchao quantization ops (e.g. int4 packing) only have CUDA kernels.
When a layer is destined for CPU (e.g. CPU offloading), we temporarily move
it to CUDA for quantization, then move the result back to CPU.
r   )	quantize_cpucudaN)	torchao.quantizationr-   next
parametersdevicer'   offload_to_cputypeto)r   moduleconfigargskwargsr-   target_devices          r   	_quantizeTorchAoQuantize._quantize=   sx     	3V..0188++0B0Be0KIIff6t6v6IIef6t6v6r   N
input_dictmodelfull_layer_namereturnc                    [        UR                  5       5      S   u  pg[        U[        5      (       a  US   OUn[	        X#5      u  p[
        R                  R                  XwR                  S9UR                  U	'   UR                  5       n
[        U5      [        U
5      :H  nU R                  R                  R                  nU(       a+  U(       a$  [        UR                   R#                  SS9SS5        SSKJn  U R                  R                  R)                  5       n[        X5      (       Ga  UR+                  SS	5      u  nnS nX>R,                  ;   a-  UR/                  S
5      (       a   S5       eUR0                  U   nOXR,                  ;   a-  UR/                  S
5      (       a   S5       eUR0                  U   nOUR,                   H{  nUR/                  S
5      (       d  M  [2        R4                  " USS  U5      (       a  UR0                  U   n  OP[2        R4                  " USS  U5      (       d  Ml  UR0                  U   n  O   UR0                  R7                  SS 5      nUb  US:X  a  U(       a!  U(       a  UR8                  R;                  5       nU R=                  UUS 5        UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     U(       a  U(       a  SW0$ 0 $ U" UU05      nU R=                  UUS S9  UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     0 $ X70$ U(       a!  U(       a  UR8                  R;                  5       nU R=                  XR                  R                  R)                  5       5        UR?                  U5        SUl         URC                  SS9 H
  nSUl         M     U(       a  U(       a  SW0$ 0 $ )Nr   )requires_gradT)decodertie_word_embeddingsF)FqnToConfig.r   zre:zHparam fqn should not start with`re:`, which is used for specifying regexzImodule fqn should not start with`re:`, which is used for specifying regex   _defaultr   c                     g)NT )xfqns     r   <lambda>)TorchAoQuantize.convert.<locals>.<lambda>   s    dr   )recursezlm_head.weight)	filter_fn)"tupleitemsr   listr   torchnn	ParameterrC   _parametersget_input_embeddingsidr'   quantization_configuntie_embedding_weightssetattrr8   get_text_configr0   rF   get_apply_tensor_subclassrsplitfqn_to_config
startswithmodule_fqn_to_configre	fullmatchgetr   cloner<   discard_is_hf_initializedr2   )r   r>   r?   r@   missing_keysr:   _valuer7   tensor_nameinput_embedis_embedding_paramr\   rF   r8   
module_fqntop_level_param_namecmaybe_module_fqn_patternlm_headparamcustom_param_fqn_configs                         r   convertTorchAoQuantize.convertN   s    ))+,Q/&ud33a25J*/((*<*<UReRe*<*f;' 002Z2k?:"&"3"3"G"G"_"_"'9ELL000>@UW\]4""66PPRf**/>/E/Ec1/M,J,A"6"66%0077 ^7 //@333%0077 _7 //
; 170D0D,3>>uEE &>qr&BOTT"778PQ&>qr&BJOO"778PQ 1E 3377
DIA}'83).E"(--"5"5"7NN61/BD ((904F-
 "(!2!25!2!A370 "B:LQh,g6pnpp /:;OQR:S.T+NN6+BdNS ((904F-!'!2!25!2!A370 "BI#++"9mm))+Gv00DD^^`a_-$(!&&u&5E'+E$ 6.@E\ '*dbddr   r&   )NNN)r   
__module____qualname____firstlineno__r)   r<   dictstrrU   TensorrV   Modulerw   __static_attributes__rK   r   r   r"   r"   9   s    )7( )-&*\eell*+\e xx%\e t	\e 
c5<<	 \e \er   r"   c                       \ rS rSrS r    SS\\\R                  4   S\	\   S-  S\R                  R                  S-  S\S-  S\\\R                  4   4
S	 jjrS
rg)TorchAoDeserialize   c                     Xl         g r%   r&   r(   s     r   r)   TorchAoDeserialize.__init__   r+   r   Nr>   source_patternsr?   r@   rA   c           
         [        UR                  5       5      S   U;  n0 nSR                  UR                  S5      SS 5      n	U(       a'  [	        US   [         5      (       a	  US   S   n
OYUS   n
OSUR                  5        H?  n[        X   5      S:w  a  [        SU S[        X   5       S	35      eX   S   X SU 3'   MA     U(       a  UW
0$ [        U R                  R                  5      (       d  [        S
5      e[        XR                  R                  5      u  pU(       a   eX   n[        X45      u  nn[	        U[        R                  R                  5      (       a   [        R                   " ["        U5      Ul        XN0$ )a  
Consolidates tensor subclass components before reconstructing the object

For example:
    input_dict: {
        "_weight_qdata": torch.Tensor,
        "_weight_scale": torch.Tensor,
    }
    full_layer_name: "model.layers.0.self_attn.k_proj.weight"

    Given this, we reconstruct a Float8Tensor instance using the qdata and scale
    and return it as a dictionary with the full_layer_name as the key and the recovered
    Float8Tensor instance as the value.
r   rG   Nr   r   zExpected a single tensor for z	 but got z tensors insteadz$Invalid torchao safetensors metadata)rT   keysjoinsplitr   len
ValueErrorr
   r'   metadatar	   r   rU   rV   Lineartypes
MethodTyper    
extra_repr)r   r>   r   r?   r@   rj   r:   is_unsafe_serialization
param_data
layer_namer   suffixunflattened_state_dictleftover_state_dict	new_paramr7   rk   s                    r   rw   TorchAoDeserialize.convert   s   . #'z'8"9!"<O"S
XXo33C8"=>
"*X.55#H-a0#H-$//+z)*a/$7xyZM_I`Haaqr  8B7I!7L
\6(34 , ##V,,$T%6%6%?%?@@CDD6Q))227
3 '&&*;	(@	fehhoo.. % 0 01CV LF++r   r&   )NNNN)r   ry   rz   r{   r)   r|   r}   rU   r~   rT   rV   r   rw   r   rK   r   r   r   r      s    ) -1(,&*9,ell*+9, cT)9, xx%	9,
 t9, 
c5<<	 9, 9,r   r   )rd   r   rU   transformers.utilsr   transformers.utils.import_utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr   1torchao.prototype.safetensors.safetensors_supportr	   /torchao.prototype.safetensors.safetensors_utilsr
   
get_loggerr   loggerr   r    r"   r   rK   r   r   <module>r      sv    
   & T 2 >  T			H	%Pkqem qeh=, =,r   