
    Z j                         S SK JrJrJr  S SKJr  SSKJr  \" 5       (       a  SSKr\R                  " \
5      r " S S\5      rg)	   )is_compressed_tensors_availableis_torch_availablelogging)CompressedTensorsConfig   )HfQuantizer    Nc                      ^  \ rS rSr% SrSr\\S'   S\4U 4S jjrS r	SS jr
S	 rS
 rS r\S 5       rS\4S jrS\4S jrSrU =r$ )CompressedTensorsHfQuantizer   zu
Quantizer for the compressed_tensors package.  Loads and restores models to
quantized state with compressed_tensors
Tquantization_configc                    > [         TU ]  " U40 UD6  [        5       (       d  [        S5      eUR	                  5         SSKJn  UR                  U5      U l        UR                  U l	        Xl
        g )NuUsing `compressed_tensors` quantized models requires the compressed-tensors library: `pip install compressed-tensors`r	   )ModelCompressor)super__init__r   ImportError	post_initcompressed_tensors.compressorsr   from_compression_config
compressorrun_compressedr   )selfr   kwargsr   	__class__s       څ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_compressed_tensors.pyr   %CompressedTensorsHfQuantizer.__init__$   si    ,77.003  	%%'B)AABUV1@@#6     c                 8    [        5       (       d  [        S5      eg )Nr   )r   r   )r   argsr   s      r   validate_environment1CompressedTensorsHfQuantizer.validate_environment7   s"    .003  1r   returnc                 X    U[         R                  :w  a  [        R                  S5        U$ )NzZWe suggest you to set `dtype=torch.float16` for better efficiency with compressed_tensors.)torchfloat16loggerinfo)r   dtypes     r   update_dtype)CompressedTensorsHfQuantizer.update_dtype>   s     EMM!KKtur   c                    SSK Jn  U R                  R                  nU" XU R                  5        U R                  R
                  (       d  U R                  R                  (       a  U R                  R                  US9  g g )Nr	   )apply_quantization_configmodel)compressed_tensors.quantizationr-   r   r   r   is_quantization_compressedis_sparsification_compressedcompress_model)r   r/   r   r-   ct_quantization_configs        r   $_process_model_before_weight_loadingACompressedTensorsHfQuantizer._process_model_before_weight_loadingC   s`    M!%!D!D 	"%ATATU$$??''DDOO***7 Er   c                     U R                   R                  (       a  U R                  (       a  U R                   R                  (       a  U R                  R                  US9  gg)z3Decompress loaded model if necessary - need for qatr.   N)r   r1   r   r2   r   decompress_model)r   r/   r   s      r   #_process_model_after_weight_loading@CompressedTensorsHfQuantizer._process_model_after_weight_loadingP   sE     $$??H[H[$$AAOO,,5,9 Br   c                     SSSSSS.nUR                  5       bD  UR                  5       R                  b)  UR                  5       R                  R                  U5        U$ )Ncolwiserowwise)z0layers.*.feed_forward.experts.*.gate_proj.weightz6layers.*.feed_forward.experts.*.gate_proj.weight_scalez.layers.*.feed_forward.experts.*.up_proj.weightz4layers.*.feed_forward.experts.*.up_proj.weight_scalez0layers.*.feed_forward.experts.*.down_proj.weight)get_text_configbase_model_tp_planupdate)r   configadditional_plans      r   update_tp_plan+CompressedTensorsHfQuantizer.update_tp_planZ   s_    @IFO>GDM@I
 !!#/F4J4J4L4_4_4k""$77>>Or   c                     g)NT r   s    r   is_trainable)CompressedTensorsHfQuantizer.is_trainableg       r   c                 h    U R                   (       + =(       d    U R                  R                  (       + $ )z7Loaded Models can carry out quantization aware training)r   r   r1   rG   s    r   is_qat_trainable-CompressedTensorsHfQuantizer.is_qat_trainablek   s'     &&&ad.F.F.a.a*aar   c                     g)z>Models quantized using compressed tensors can be saved to diskTrF   rG   s    r   is_serializable,CompressedTensorsHfQuantizer.is_serializablep   rJ   r   )r   r   r   )r)   torch.dtyper#   rQ   )__name__
__module____qualname____firstlineno____doc__requires_calibrationr   __annotations__r   r!   r*   r5   r9   rC   propertyrH   boolrL   rO   __static_attributes____classcell__)r   s   @r   r   r      so    
  007,C 7&
8:  b$ b
  r   r   )utilsr   r   r   utils.quantization_configr   baser   r%   
get_loggerrR   r'   r   rF   r   r   <module>ra      s@     Q P ?  			H	%W; Wr   