
    Z j                         S SK Jr  SSKJr  \(       a  SSKJr  SSKJr  SSKJ	r	J
r
Jr  \
" 5       (       a  S SKr\R                  " \5      r " S	 S
\5      rg)    )TYPE_CHECKING   )HfQuantizer   )PreTrainedModel)BitNetQuantConfig)is_accelerate_availableis_torch_availableloggingNc                      ^  \ rS rSr% SrSrS\S'   U 4S jrS r  SS jr	S	\
\\\-  4   S
\
\\\-  4   4S jrS r\S
\4S j5       r\S
\4S j5       rS rSrU =r$ )BitNetHfQuantizer!   z
1.58-bit quantization from BitNet quantization method:
Before loading: it converts the linear layers into BitLinear layers during loading.

Check out the paper introducing this method: https://huggingface.co/papers/2402.17764
Tr   quantization_configc                 (   > [         TU ]  " U40 UD6  g )N)super__init__)selfr   kwargs	__class__s      y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/quantizers/quantizer_bitnet.pyr   BitNetHfQuantizer.__init__,   s    ,77    c                    [        5       (       d  [        S5      e[        R                  R	                  5       (       d  [
        R                  S5        g UR                  S5      nUc  [
        R                  S5        g [        U[        5      (       aC  [        U5      S:  a  SUR                  5       ;   d  SUR                  5       ;   a  [        S5      eg g )	NzOLoading a BitNet quantized model requires accelerate (`pip install accelerate`)zhYou don't have a GPU available to load the model, the inference will be slow because of weight unpacking
device_mapzYou have loaded a BitNet model on CPU and have a CUDA device available, make sure to set your model on a GPU device in order to run your model.r   cpudiskzYou are attempting to load a BitNet model with a device_map that contains a CPU or disk device.This is not supported. Please remove the CPU or disk device from the device_map.)r	   ImportErrortorchcudais_availableloggerwarning_onceget
isinstancedictlenvalues
ValueError)r   argsr   r   s       r   validate_environment&BitNetHfQuantizer.validate_environment/   s    &((oppzz&&((z ZZ-
I 
D)):"u
0A0A0C'CvQ[QbQbQdGd g  He *r   c                     SSK Jn  U R                  XR                  R                  UR
                  5      U l        U" UU R                  U R                  S9ng )Nr   )replace_with_bitnet_linear)modules_to_not_convertr   )integrationsr-   get_modules_to_not_convertr   r.   _keep_in_fp32_modules)r   modelr   r-   s       r   $_process_model_before_weight_loading6BitNetHfQuantizer._process_model_before_weight_loadingF   sR    
 	>&*&E&E++BBED_D_'
# +#'#>#> $ 8 8
r   
max_memoryreturnc                 `    UR                  5        VVs0 s H
  u  p#X#S-  _M     nnnU$ s  snnf )Ng?)items)r   r5   keyvals       r   adjust_max_memory#BitNetHfQuantizer.adjust_max_memoryW   s5    6@6F6F6HI6H(#c:o6H
I Js   *c                     g)NT r   s    r   is_serializable!BitNetHfQuantizer.is_serializable[   s    r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )Nautobitlinearonliner   linear_classquantization_moder?   s    r   is_trainableBitNetHfQuantizer.is_trainable^   s7     $$11_D G((::hF	
r   c                 t    U R                   R                  S:H  =(       a    U R                   R                  S:H  $ )zUFlag indicating whether the quantized model can carry out quantization aware trainingrC   rD   rE   r?   s    r   is_qat_trainable"BitNetHfQuantizer.is_qat_trainablee   s7     $$11_D G((::hF	
r   c                     SSK Jn  SSKJn  U R                  R
                  S:X  a,  U R                  R                  S:X  a  U" S/S/U" U 5      /S9/$ / $ )Nr   )WeightConverter)BitNetDeserializerC   offlineweight)source_patternstarget_patterns
operations)core_model_loadingrN   integrations.bitnetrO   r   rF   rG   )r   rN   rO   s      r   get_weight_conversions(BitNetHfQuantizer.get_weight_conversionsm   sb    8; $$11_D((::iG  %-J%-J 1$ 78  	r   )r.   )r2   r   )__name__
__module____qualname____firstlineno____doc__requires_calibration__annotations__r   r*   r3   r%   strintr;   r@   propertyboolrH   rK   rW   __static_attributes____classcell__)r   s   @r   r   r   !   s      ,,8.
 
"DcCi,@ T#sUXy.EY  
d 
 
 
$ 
 
 r   r   )typingr   baser   modeling_utilsr   utils.quantization_configr   utilsr	   r
   r   r   
get_loggerrY   r!   r   r>   r   r   <module>rl      sL    !  0= H H  
		H	%[ [r   