
    Z j                         S SK JrJr  SSKJr  SSKJr  \(       a  SSKJr  SSK	J
r
  SSKJrJrJrJrJr  SS	K	Jr  \" 5       (       a  S S
Kr\R&                  " \5      r " S S\5      rg
)    )TYPE_CHECKINGOptional   )HfQuantizer)get_module_from_name   )PreTrainedModel)FPQuantConfig)is_fp_quant_availableis_qutlass_availableis_torch_availableis_torch_xpu_availablelogging)QuantizationConfigMixinNc                      ^  \ rS rSr% SrSrSrS\S'   S\4U 4S jjr	S r
SS
 jrSSS\S	\4S jr  SS jr\SS\S   4S jj5       rS rS rS rSrU =r$ )FPQuantHfQuantizer"   z
Quantizer for the FP-Quant method. Enables the loading of prequantized models and in-flight quantization of full-precision models.
FTr
   quantization_configc                 (   > [         TU ]  " U40 UD6  g N)super__init__)selfr   kwargs	__class__s      {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_fp_quant.pyr   FPQuantHfQuantizer.__init__+   s    ,77    c                    [         R                  R                  5       (       d  [        5       (       d  [	        S5      e[        5       (       d&  U R                  R                  (       d  [        S5      eU R                  R                  (       am  U R                  R                  S:X  aS  [         R                  R                  5       (       a0  [         R                  R                  5       S   S:  a  [        S5      eU R                  R                  (       a  [        R                  S5        [        5       (       d  [        S5      eUc&  U R                  R                  (       d  [        S	5      e[        U[         5      (       a^  U R                  R                  (       d#  [#        U5      S
:  a  SUR%                  5       ;   d  SUR%                  5       ;   a  [        S5      eg g )Nz]FPQuant quantization is only supported on GPU or Intel XPU. Please use a different quantizer.a  Using `fp_quant` with real quantization requires a **Blackwell GPU** and qutlass: `git clone https://github.com/IST-DASLab/qutlass.git && cd qutlass && pip install --no-build-isolation .`. You can use `FPQuantConfig(pseudoquantization=True, ...)` to use Triton-based pseudo-quantization. It doesn't provide any speedups but emulates the quantization behavior of the real quantization.nvfp4r   	   zNVFP4 pseudoquantization requires a GPU with compute capability >= 9.0 (Hopper or newer) because the Triton kernel uses the `fp8e4nv` type. Please use `forward_dtype='mxfp4'` instead, or use a GPU with compute capability >= 9.0.zUsing pseudo-quantization for FP-Quant. This doesn't provide any speedups but emulates the quantization behavior of the real quantization.zGUsing `fp_quant` quantization requires fp_quant: `pip install fp_quant`zyYou are attempting to load a FPQuant model without setting device_map. Please set device_map comprised of 'cuda' devices.r   cpudiskzYou are attempting to load a FPQuant model with a device_map that contains a CPU or disk device. This is not supported. Please remove the CPU or disk device from the device_map.)torchcudais_availabler   NotImplementedErrorr   r   pseudoquantizationImportErrorforward_dtypeget_device_capability
ValueErrorloggerwarningr   
isinstancedictlenvalues)r   
device_mapr   s      r   validate_environment'FPQuantHfQuantizer.validate_environment.   s   zz&&((1G1I1I%o  $%%d.F.F.Y.Y S 
 $$77((66'A

''))

002159?  ##66NN ] %&&ghhd&>&>&Q&QF  
D)),,??
Oa'Z..00Z..00 h  1 *r   returnc                     U[         R                  :w  a)  [        R                  SU S35        [         R                  nU$ )NzSetting dtype to zP, but only bfloat16 is supported right now. Overwriting torch_dtype to bfloat16.)r$   bfloat16r-   warning_once)r   dtypes     r   update_dtypeFPQuantHfQuantizer.update_dtype^   s9    ENN"#E7*z{ NNEr   modelr	   
param_namec                 X    SSK Jn  [        X5      u  pV[        XT5      (       a  US;   a  gg)Nr   )FPQuantLinear)weightqweightdqweightTF)fp_quantr@   r   r/   )r   r=   r>   r   r@   moduletensor_names          r   param_needs_quantization+FPQuantHfQuantizer.param_needs_quantizationf   s+    *25Ef,,@a1ar   c                 J    SSK Jn  SSKJn  U" UU" U R                  5      S9  g )Nr   )replace_with_fp_quant_linearr   )adapt_fp_quant_config)fp_quant_linear_config)rD   rJ   integrations.fp_quantrK   r   )r   r=   r   rJ   rK   s        r   $_process_model_before_weight_loading7FPQuantHfQuantizer._process_model_before_weight_loadingp   s#    
 	:A$#89Q9Q#R	
r   c                 j    U R                   R                  nU(       d  [        R                  S5        U$ )NzYou are attempting to train a model with FPQuant quantization. This is only supported when `store_master_weights=True`. Please set `store_master_weights=True` to train the model.)r   store_master_weightsr-   r.   )r   r=   	trainables      r   is_trainableFPQuantHfQuantizer.is_trainable~   s0    ,,AA	NN E r   c                     g)NT )r   s    r   is_serializable"FPQuantHfQuantizer.is_serializable   s    r   c                     SSK Jn  U" U 5      $ )Nr   )FpQuantQuantize)rM   rZ   )r   rZ   s     r   get_quantize_ops#FPQuantHfQuantizer.get_quantize_ops   s    ;t$$r   c                     SSK Jn  SSKJn  U R                  (       a=  U R
                  R                  (       a  U" S/SU" U 5      /S9/$ U" S/SU" U 5      /S9/$ / $ )Nr   )WeightConverter)FpQuantDeserializez	.dqweight)source_patternstarget_patterns
operationsz.qweight)core_model_loadingr^   rM   r_   pre_quantizedr   r(   )r   r^   r_   s      r   get_weight_conversions)FPQuantHfQuantizer.get_weight_conversions   ss    8>''::#)4(3$6t$<#=  $)3(2$6t$<#=  	r   rV   )r:   torch.dtyper6   rg   )r=   r	   r   )__name__
__module____qualname____firstlineno____doc__requires_calibrationis_qat_trainable__annotations__r   r   r4   r;   strboolrG   rN   propertyr   rS   rW   r[   re   __static_attributes____classcell__)r   s   @r   r   r   "   s     !((8,C 8.`.? S _c 
 
 (+<"=  %
 r   r   )typingr   r   baser   quantizers_utilsr   modeling_utilsr	   utils.quantization_configr
   utilsr   r   r   r   r   r   r$   
get_loggerrh   r-   r   rV   r   r   <module>r|      sP    +  2 09 t t ? 			H	%B Br   