
    Z j7                        S SK Jr  SSKJr  SSKJr  SSKJrJr  SSK	J
r
JrJrJrJr  \" 5       (       a
  S SKrS SKJr  \
" 5       (       a  S S	KJr  \" 5       r\" 5       (       a  \(       d  S SKr\R,                  " \5      r " S
 S\5      r " S S\R                   R4                  5      r " S S\R8                  5      r\" SS9S 5       r SS\\    S-  4S jjr!g)    )	lru_cache   )ACT2FN)ConversionOps)get_module_from_nameshould_convert_module)is_accelerate_availableis_fbgemm_gpu_availableis_torch_availableis_torch_xpu_availableloggingN)nn)init_empty_weightsc            	           \ rS rSrS r S	S\\\R                  \	\R                     -  4   S\R                  R                  S-  S\\\R                  4   4S jjrSrg)
FbgemmFp8Quantize,   c                     Xl         g Nhf_quantizer)selfr   s     u/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/fbgemm_fp8.py__init__FbgemmFp8Quantize.__init__-   s    (    N
input_dictmodelreturnc                 z   [        UR                  5       5      S   u  pEUS   nSSKJn  [	        X$5      u  px[        Xv5      (       Ga  US:X  a}  UR                  SS5      n	U	R                  n
U	R                  SU
S   5      n[        U5      u  pUR                  U
5      nUR                  SS5      nUR                  U
S   SU
S   5      nOUS:X  a|  UR                  SS5      n	U	R                  n
U	R                  SU
S   5      n[        U5      u  pUR                  U
5      nUR                  SS5      nUR                  U
S   U
S   S5      nOI[        U5      u  p[        R                  R                  UR                  UR                  S   S5      5      nU[        R                  R                  W5      U S3W0$ )	Nr   r   )FbgemmFp8Llama4TextExpertsgate_up_proj   	down_proj_scale)tupleitemsintegrationsr    r   
isinstance	transposeshapereshapequantize_fp8_per_rowtorchr   	Parameterview)r   r   r   kwargs
target_keyvaluer    moduletensor_nametransposed_paramoriginal_shapeflattened_paramnew_value_flatweight_scale_flat	new_valueweight_scales                   r   convertFbgemmFp8Quantize.convert0   s    "*"2"2"45a8
a=25Ef99n, $)??1a#8  "2!7!7"2":":2~b?Q"R 5I4Y1 +22>B	%//15	0889JA~^_O`a+ $)??1a#8  "2!7!7"2":":2~b?Q"R 5I4Y1 +22>B	%//15	0889JN[\L]_`a&:5&A#I 88--l.?.?@R@RST@UWX.YZLEHH..y9j\;PR^__r   r   r   )__name__
__module____qualname____firstlineno__r   dictstrr.   Tensorlistr   Moduler=   __static_attributes__ r   r   r   r   ,   sn    ) )-2`ellT%,,-???@2` xx%2`
 
c5<<	 2` 2`r   r   c                   H   ^  \ rS rSr\R
                  4U 4S jjrS rSrU =r	$ )FbgemmFp8Lineare   c                 ^  > [         TU ]  XU5        Xl        X l        [        R
                  R                  [        R                  " X!4US95      U l        [        R
                  R                  [        R                  " US4[        R                  S95      U l
        U R                  S[        R                  " S/[        R                  S9SS9  U(       aP  [        R
                  R                  [        R                  " U R                  [        R                  S95      U l        g S U l        g )Ndtyper"   input_scale_ubF
persistent)superr   in_featuresout_featuresr.   r   r/   zerosweightfloat32r<   register_bufferfloatbias)r   rT   rU   r[   rO   	__class__s        r   r   FbgemmFp8Linear.__init__f   s    D9&(hh((l5PX])^_!HH..u{{L!;LTYTaTa/bc-u{{A3ekk/R_de**5;;8I8IRWR_R_+`aDIDIr   c           	         / UR                   S S QSP7n[        UR                  SUR                   S   5      R                  5       U R                  S9u  p4U R
                  R                  [        R                  5      n[        (       ac  [        R                  " UU R                  R                  5       UR                  S5      UR                  5       UR                  U R                  S9nOP[        R                   R"                  R%                  X0R                  XESS9nU R                  b  X`R                  -   OUnUR                  UR&                  5      nUR)                  U5      nAAU$ )Nr#   )scale_ub)scale_ascale_b	out_dtyper[   Tuse_fast_accum)r+   r-   r0   
contiguousrP   r<   tor.   rX   _is_torch_xpu_available
_scaled_mmrW   t	unsqueezerO   r[   opsfbgemmf8f8bf16_rowwisedevicer,   )r   xoutput_shapex_quantizedx_scaleweight_scale_float32outputs          r   forwardFbgemmFp8Linear.forwardt   s*   *"*r*  4AFF2qwwr{4K4V4V4Xcgcvcvw
  $0033EMMB""%%))"-,..0''YYF YY%%66[['X\ 7 F ,099+@Vii'fF188$-r   )r[   rT   rU   rW   r<   )
r?   r@   rA   rB   r.   float8_e4m3fnr   ru   rH   __classcell__r\   s   @r   rK   rK   e   s    >C>Q>Q  r   rK   c                   H   ^  \ rS rSr\R
                  4U 4S jjrS rSrU =r	$ )r       c                 T  > [         TU ]  5         UR                  U l        UR                  U l        UR
                  U l        U R                  U l        [        UR                     U l	        [        R                  R                  [        R                  " U R                  U R
                  SU R                  -  4[        R                  S95      U l        [        R                  R                  [        R                  " U R                  SU R                  S-  4[        R                   S95      U l        [        R                  R                  [        R                  " U R                  U R                  U R
                  4[        R                  S95      U l        [        R                  R                  [        R                  " U R                  U R
                  S4[        R                   S95      U l        U R)                  S[        R                  " S/[        R*                  S9SS9  g )Nr   rN   r"   rP   FrQ   )rS   r   num_local_expertsnum_expertsintermediate_sizehidden_size
expert_dimr   
hidden_actact_fnr.   r   r/   rV   rw   r!   rX   gate_up_proj_scaler$   down_proj_scalerY   rZ   )r   configrO   r\   s      r   r   #FbgemmFp8Llama4TextExperts.__init__   s   !33!'!9!9!--00V../!HH..KK))4+;+;Q=PQY^YlYlm
 #((("4"4KK))1doo.AB%--X#
 ++KK))4??D<L<LMUZUhUhi
  %xx11KK))4+;+;Q?u}}U 
 	-u{{A3ekk/R_der   c           
      	   UR                  U R                  SU R                  5      nSn[        R                  " U5      n[        U R                  5       GH4  nX   nUR                  SU R                  5      n[        XbU R                  5      u  pxU R                  R                  S   S-  n	U R                  R                  [        R                  5      n
[        (       GaC  [        R                  " UU R                  U   R!                  SS5      SU	 R#                  5       R%                  5       UR'                  S5      X   S   SU	 R                  SS5      R#                  5       R%                  5       UR(                  S9n[        R                  " UU R                  U   R!                  SS5      U	S R#                  5       R%                  5       UR'                  S5      X   S   U	S R                  SS5      R#                  5       R%                  5       UR(                  S9nO[        R*                  R,                  R/                  UU R                  U   R!                  SS5      SU	 R#                  5       UX   S   SU	 R                  SS5      R#                  5       SS9n[        R*                  R,                  R/                  UU R                  U   R!                  SS5      U	S R#                  5       UX   S   U	S R                  SS5      R#                  5       SS9nXR1                  U5      -  n[        XU R                  5      u  pU R2                  R                  [        R                  5      n[        (       a  [        R                  " UU R4                  U   R!                  SS5      R#                  5       UR'                  S5      UU   R                  SS5      R#                  5       R%                  5       UR(                  S9nOw[        R*                  R,                  R/                  UU R4                  U   R!                  SS5      R#                  5       UUU   R                  SS5      R#                  5       SS9nUX4'   GM7     UR                  UR6                  5      nUR                  SU R                  5      $ )	z
Args:
    hidden_states (torch.Tensor): (batch_size * token_num, hidden_size)
Returns:
    torch.Tensor: (batch_size * token_num, hidden_size)
r#   Nr   r   r"   )r`   ra   rb   Trc   )r0   r~   r   r.   
empty_likeranger,   r-   rP   r!   r+   r   rf   rX   rg   rh   r*   re   ri   rj   rO   rk   rl   rm   r   r   r$   rn   )r   hidden_states
num_tokensnext_statesiexpert_hiddenexpert_hidden_reshapedexpert_quantizedexpert_scalesharded_expert_dimgate_up_proj_scale_float32gateup	activatedactivated_quantizedactivated_scaledown_proj_scale_float32expert_outputs                     r   ru   "FbgemmFp8Llama4TextExperts.forward   s\    &**4+;+;RAQAQR
 &&}5t''(A),M%2%:%:2t?O?O%P"-A&D4G4G.* "&!2!2!8!8!<!A)-)@)@)C)CEMM)R&&&''$%%a(221a89L:LMXXZ\\^(222669!<=P>PQVVWY[\]hhjlln+11 %%$%%a(221a89K9LMXXZ\\^(222669!<=O=PQVVWY[\]hhjlln+11 yy''88$%%a(221a89L:LMXXZ .1!45H6HINNrSTU``b#' 9  YY%%66$%%a(221a89K9LMXXZ .1!45G5HINNrSTU``b#' 7  [[..I3G	_c_r_r3s0&*&:&:&=&=emm&L#&& % 0 0'NN1%//15@@B+55b93A6;;BBMMOQQS+11! !&		 0 0 A A'NN1%//15@@B#+A.33B:EEG#' !B ! +KNA )B "nn]%9%9:D$4$455r   )	r   r$   r   r   r!   r   r   r   r~   )
r?   r@   rA   rB   r.   rX   r   ru   rH   rx   ry   s   @r   r    r       s    %*]] f0P6 P6r   r    r"   )maxsizec                      [         (       a  SSKJn   U " S5      R                  $ [        R
                  R                  R                  $ )Nr"   
get_kernelzkernels-community/fp8-fbgemm)rg   hub_kernelsr   r-   r.   rk   rl   r   s    r   get_quantize_fp8_per_rowr      s3    +89NNN99000r   modules_to_not_convertc                    [        5       qSnU(       a  0 OSS0nU R                  5        H  u  px[        Xq5      (       d  M  Sn	[	        SS9   UR
                  R                  S:X  a@  [        U R                  SU R                  5      n
[        U
=(       d    U R                  5      n	O^[        U[        R                  5      (       a?  [        UR                  UR                  UR                   SL40 UD6n	U	R#                  S5        SSS5        U	c  M  U R%                  Xy5        SnM     U(       d  [&        R)                  S5        U $ ! , (       d  f       NH= f)	a  
A helper function to replace all `torch.nn.Linear` modules by `FbgemmFp8Linear` modules.
This will enable running your models using high performance fp8 kernel from FBGEMM library.

Parameters:
    model (`torch.nn.Module`):
        Input model or `torch.nn.Module` as the function is run recursively.
    modules_to_not_convert (`list[`str`]`, *optional*, defaults to `None`):
        Names of the modules to not convert. In practice we keep the `lm_head` in full precision for numerical stability reasons.
    quantization_config (`FbgemmFp8Config`):
        The quantization config object that contains the quantization parameters.
    pre_quantized (`book`, defaults to `False`):
        Whether the model is pre-quantized or not
FrO   NT)include_buffersLlama4TextExpertstext_configzYou are loading your model using FP8 quantization but no linear modules were found in your model. Please double check your model architecture, or submit an issue on github if you think this is a bug.)r   r-   named_modulesr   r   r\   r?   getattrr   r    r)   r   LinearrK   rT   rU   r[   requires_grad_set_submoduleloggerwarning)r   r   quantization_configpre_quantizedtp_planhas_been_replacedmodule_kwargsmodule_namer4   
new_moduler   s              r   replace_with_fbgemm_fp8_linearr   	  s.   $ 45'Bgt_M$224$[II
5((,??
 &ellM5<<P78Su||T
FBII..,&&''KKt+ $	
 ))%0 6" K4 5  58 	
 L= 65s   B9E
E	)NNFN)"	functoolsr   activationsr   core_model_loadingr   quantizers.quantizers_utilsr   r   utilsr	   r
   r   r   r   r.   r   
accelerater   rg   fbgemm_gpu.experimental.gen_ai
fbgemm_gpu
get_loggerr?   r   r   r   rK   rG   r    r   rF   rD   r   rI   r   r   <module>r      s        . U  -02 %<)			H	%6` 6`r,ehhoo ,^i6 i6X 11 1 tx:#'9t#3:r   