
    Z j                         S SK Jr  S SKJr  S SKJr  SSKJr  \(       a  SSKJ	r	  SSK
JrJrJrJr  SS	KJrJr  \" 5       (       a  S S
Kr\R&                  " \5      rSrSr " S S\5      rg
)    )metadata)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)is_gptqmodel_availableis_optimum_availableis_torch_availablelogging)
GPTQConfigQuantizationConfigMixinNz1.4.3z1.24.0c                      ^  \ rS rSr% SrSrS\S'   S\4U 4S jjrS r	SS	 jr
S
 rSS jrSS jr\S\4S j5       rS rSrU =r$ )GptqHfQuantizer'   z
Quantizer of the GPTQ method - for GPTQ the quantizer support calibration of the model through
the GPT-QModel package (Python import name `gptqmodel`). Quantization is done under the hood for users if they
load a non-prequantized model.
Fr   quantization_configc                    > [         TU ]  " U40 UD6  [        5       (       d  [        S5      eSSKJn  UR                  U R                  R                  5       5      U l	        g )NGLoading a GPTQ quantized model requires optimum (`pip install optimum`)r   )GPTQQuantizer)
super__init__r   ImportErroroptimum.gptqr   	from_dictr   to_dict_optimumoptimum_quantizer)selfr   kwargsr   	__class__s       w/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_gptq.pyr   GptqHfQuantizer.__init__1   sP    ,77#%%ghh.!.!8!89Q9Q9a9a9c!d    c                 T   [        5       (       d  [        S5      e[        5       nU(       d.  [        R                  R                  5       (       d  [        S5      e[        5       (       d  [        S5      e[        5       (       a  [        R                  " [        R                  " S5      5      [        R                  " [        5      :  dF  [        R                  " [        R                  " S5      5      [        R                  " [        5      :  a  [        S[         S[         35      eg g )Nr   z2GPU is required to quantize or run quantize model.zTLoading a GPTQ quantized model requires gptqmodel (`pip install gptqmodel`) library.	gptqmodeloptimumz#The gptqmodel version should be >= z, optimum version should >= )r   r   r
   torchcudais_availableRuntimeErrorr   parser   MIN_GPTQ_VERSIONMIN_OPTIMUM_VERSION)r   argsr   gptq_supports_cpus       r!   validate_environment$GptqHfQuantizer.validate_environment:   s    #%%ghh24 )@)@)B)BSTT'))tuu#%%MM(**;787==IY;ZZ}}X--i89GMMJ]<^^56F5GGcdwcxy  _ &r#   returnc                 X    U[         R                  :w  a  [        R                  S5        U$ )NzLWe suggest you to set `dtype=torch.float16` for better efficiency with GPTQ.)r'   float16loggerinfo)r   dtypes     r!   update_dtypeGptqHfQuantizer.update_dtypeK   s     EMM!KKfgr#   c                 <    Uc  S[         R                  " S5      0nU$ )N cpu)r'   device)r   
device_maps     r!   update_device_map!GptqHfQuantizer.update_device_mapP   s!    ell512Jr#   c                 p   UR                   R                  S:w  a  [        S5      eU R                  (       a  [        R
                  " [        R                  " S5      5      [        R
                  " [        5      :  a  U R                  R                  U5      ng U R                  R                  " U40 UD6ng g )N	input_idsz%We can only quantize pure text model.r&   )
r    main_input_namer*   pre_quantizedr   r+   r   r-   r   convert_modelr   modelr   s      r!   $_process_model_before_weight_loading4GptqHfQuantizer._process_model_before_weight_loadingU   s    ??**k9FGG}}X--i89GMMJ]<^^..<<UC..<<UMfM r#   c                    U R                   (       a  U R                  R                  U5      ng U R                  R                  c  UR
                  U R                  l        U R                  R                  XR                  R                  5        [        R                  " U R                  R                  5       5      UR                  l        g )N)rD   r   post_init_modelr   	tokenizername_or_pathquantize_modelr   r   to_dictconfigrF   s      r!   #_process_model_after_weight_loading3GptqHfQuantizer._process_model_after_weight_loading`   s    **::5AE''1195:5G5G((2""11%9Q9Q9[9[\/9/C/CDDZDZDbDbDd/eELL,r#   c                     gNT r   s    r!   is_trainableGptqHfQuantizer.is_trainablej   s    r#   c                     grT   rU   rV   s    r!   is_serializableGptqHfQuantizer.is_serializablen   s    r#   )r   )r7   torch.dtyper2   r\   )rG   r	   )__name__
__module____qualname____firstlineno____doc__requires_calibration__annotations__r   r   r0   r8   r?   rH   rQ   propertyboolrW   rZ   __static_attributes____classcell__)r    s   @r!   r   r   '   se     !%%e,C e"

	Nf d   r#   r   )	importlibr   typingr   	packagingr   baser   modeling_utilsr	   utilsr
   r   r   r   utils.quantization_configr   r   r'   
get_loggerr]   r5   r,   r-   r   rU   r#   r!   <module>rp      s^         0 ] ] K 			H	%   Hk Hr#   