
    Z j                         S SK rS SKJr  S SKJr  SSKJr  \(       a  SSKJ	r	  SSK
Jr  SS	KJrJrJrJr  SS
K
Jr  \" 5       (       a  S SKr\R&                  " \5      r " S S\5      rg)    N)TYPE_CHECKING)version   )HfQuantizer   )PreTrainedModel)	AwqConfig)is_accelerate_availableis_gptqmodel_availableis_torch_availablelogging)
AwqBackendc                   n   ^  \ rS rSr% SrSrS\S'   U 4S jrS rS r	SS	 jr
S
 rS r\S 5       rSrU =r$ )AwqQuantizer$   zm
4-bit quantization for Activation-aware Weight Quantization(AWQ) (https://huggingface.co/papers/2306.00978)
Tr	   quantization_configc                 (   > [         TU ]  " U40 UD6  g )N)super__init__)selfr   kwargs	__class__s      v/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_awq.pyr   AwqQuantizer.__init__-   s    ,77    c                 l    [        5       (       d  [        S5      e[        5       (       d  [        S5      eg )NzaLoading an AWQ quantized model requires gptqmodel. Please install it with `pip install gptqmodel`zMLoading an AWQ quantized model requires accelerate (`pip install accelerate`))r   ImportErrorr
   )r   r   s     r   validate_environment!AwqQuantizer.validate_environment0   s7    %''s  '((mnn )r   c                    U[         R                  :X  am  [         R                  R                  5       (       d#  [         R                  R                  5       (       a'  [
        R                  S5        [         R                  nU$ U[         R                  :w  a[  [         R                  R                  5       (       d#  [         R                  R                  5       (       a  [
        R                  S5        U$ )Nz[`torch.bfloat16` is not supported for AWQ CUDA/XPU kernels yet. Casting to `torch.float16`.zWWe suggest you to set `dtype=torch.float16` for better efficiency on CUDA/XPU with AWQ.)torchbfloat16cudais_availablexpuloggerwarningfloat16)r   dtypes     r   update_dtypeAwqQuantizer.update_dtype9   s    ENN"

(?(?(A(AUYYE[E[E]E]NNm MME  emm#)@)@)B)BeiiF\F\F^F^NNtur   c                    SSK JnJn  U R                  XR                  R
                  UR                  SS9U l        U" UU R                  U R
                  UR                  S5      S9nU" XR                  R                  5      ng )Nr   )replace_quantization_scalesreplace_with_awq_linearT)add_default_skips
device_map)r   modules_to_not_convertr0   )
integrationsr-   r.   get_modules_to_not_convertr   r1   _keep_in_fp32_modulesgetconfig
model_type)r   modelr   r-   r.   s        r   $_process_model_before_weight_loading1AwqQuantizer._process_model_before_weight_loadingC   s{    W&*&E&E++BBED_D_sw 'F '
# ( $ 8 8#'#>#>zz,/	
 ,E<<3J3JKr   c                 D    SSK Jn  U" XR                  R                  S9  g )Nr   )hf_gptqmodel_post_init)use_act_order)gptqmodel.utils.modelr<   r   desc_act)r   r8   r   r<   s       r   #_process_model_after_weight_loading0AwqQuantizer._process_model_after_weight_loadingS   s    @u4L4L4U4UVr   c                     U R                   R                  [        R                  [        R                  4;   a  [
        R                  S5        gg)Nz7You cannot save an AWQ model that uses Exllama backend!FT)r   backendr   
EXLLAMA_V1
EXLLAMA_V2r&   r'   r   s    r   is_serializableAwqQuantizer.is_serializableX   s:    ##++
0E0EzG\G\/]]NNTUr   c                     [         R                  " [        R                  R                  S5      5      [         R                  " S5      :  $ )N	gptqmodelz5.0.0)r   parse	importlibmetadatarF   s    r   is_trainableAwqQuantizer.is_trainable_   s1    }}Y//77DEW^I___r   )r1   )r8   r   )__name__
__module____qualname____firstlineno____doc__requires_calibration__annotations__r   r   r*   r9   r@   rG   propertyrN   __static_attributes____classcell__)r   s   @r   r   r   $   sM    
  $$8oL W
 ` `r   r   )importlib.metadatarL   typingr   	packagingr   baser   modeling_utilsr   utils.quantization_configr	   utilsr
   r   r   r   r   r!   
get_loggerrP   r&   r    r   r   <module>rc      sS         05 ` ` 2 			H	%=`; =`r   