
    Z j                        S SK Jr  S SKJr  S SKJrJr  SSKJr  SSK	J
r
Jr  \R                  " \5      r\" 5       (       a
  S SKrS SKJr       S             SS jjr " S	 S
\5      r " S S\5      rg)    )annotations)Any)is_torch_availablelogging   )ConversionOps)get_module_from_nameshould_convert_moduleNc                   SSK Jn  Uc  / n[        U R                  5       5       H  u  px[	        U[
        R                  5      (       d  M&  [        Xq5      (       d  M8  UR                  S5      u  pnU	(       a  U R                  U	5      OU nU" U(       d  UR                  OSU(       d  UR                  OSU(       d  UR                  SLOSUUUSS9n[        XU5        M     U $ )a  
Replace nn.Linear modules with empty SINQLinear modules.

Args:
    model: The model to modify
    modules_to_not_convert: List of module names to skip
    quant_config: SINQ quantization config dict (None for pre-quantized models)
    compute_dtype: Computation dtype for the quantized layers
    device: Device string for the quantized layers
    pre_quantized: Whether loading a pre-quantized checkpoint

Returns:
    The modified model with SINQLinear modules
r   )
SINQLinearN.FT)in_featuresout_featuresbiasquant_configcompute_dtypedeviceuse_unpack_kernel)sinq.sinqlinear_hfr   listnamed_modules
isinstancennLinearr
   
rpartitionget_submoduler   r   r   setattr)modelmodules_to_not_convertr   r   r   pre_quantizedr   	full_namemoduleparent_path_
child_nameparent
sinq_layers                 o/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/integrations/sinq.pyreplace_with_sinq_linearr)       s    , .%!#!%"5"5"78	&")),,$YGG%.%9%9#%>"
5@$$[1e2?**T4A,,t2?&++T)U%'"

 	J/' 9* L    c                  @    \ rS rSrSrS r   S       SS jjrSrg)	SinqQuantizeS   a  
Param-level ConversionOp for SINQ (from FP weights).

At load time, for each `Linear.weight` that should be quantized:
  - The SINQLinear module already exists (created in _process_model_before_weight_loading)
  - We just call quantize() on it with the loaded weight tensor
c                    Xl         g Nhf_quantizerselfr1   s     r(   __init__SinqQuantize.__init__\       (r*   Nc                    [        [        UR                  5       5      5      u  pg[        U[        5      (       a  US   OUn[        X#5      u  pU	R                  U5        Ub  UR                  U5        SU	l        0 $ )Nr   T)	nextiteritemsr   r   r	   quantizediscard_is_hf_initialized)r3   
input_dictr   full_layer_namemissing_keyskwargsr$   valuesweight_tensorr"   tensor_names              r(   convertSinqQuantize.convert_   so     j..012	%/%=%=q	625J&#  1$(!	r*   r0   )NNNr>   zdict[str, Any]r   ztorch.nn.Module | Noner?   z
str | Nonereturnzdict[str, torch.Tensor]__name__
__module____qualname____firstlineno____doc__r4   rE   __static_attributes__ r*   r(   r,   r,   S   sG    ) )-&*" & $	 
! r*   r,   c                  >    \ rS rSrSrS r  S       SS jjrSrg)	SinqDeserializev   a  
ConversionOp for loading *pre-quantized* SINQ checkpoints.

Checkpoint layout (what `SINQLinear.state_dict` produces) is, per module:
    <prefix>.W_q
    <prefix>.bias
    <prefix>.meta

WeightConverter in the quantizer is configured so that:
  - we group ".W_q", ".meta", ".bias" as input_dict
  - conceptually treat them as belonging to "<prefix>.weight"
  - and call this SinqDeserialize.convert to load the state into the existing SINQLinear.

The returned dict is {} because we load directly into the module.
c                    Xl         g r/   r0   r2   s     r(   r4   SinqDeserialize.__init__   r6   r*   Nc                   [        UR                  5       5       H#  u  pV[        U[         5      (       d  M  US   X'   M%     UR                  S5      nUR                  S5      nUR                  S5      n	Ub  Uc?  [	        [        UR                  5       5      5      n[        U[         5      (       a  US   nX60$ [        X#5      u  pUUS.nU	b  XS'   U
R                  U5        SU
l	        0 $ )Nr   z.W_qz.metaz.bias)W_qmetar   T)
r   r:   r   getr8   r9   rB   r	   load_state_dictr=   )r3   r>   r   r?   rA   kvrW   rX   r   r"   r$   states                r(   rE   SinqDeserialize.convert   s     ))+,DA!T"" !!
 - nnV$~~g&~~g& ;$,T*++-./A!T""aD#''(@	 
  &Mu%$(!	r*   r0   )NNrG   rI   rP   r*   r(   rR   rR   v   sD     ) )-&*	#"# &# $	# 
!# #r*   rR   )NNNzcuda:0F)r   torch.nn.Moduler   zlist[str] | Noner   zdict | Noner   ztorch.dtyper   strr    boolrH   r_   )
__future__r   typingr   transformers.utilsr   r   core_model_loadingr   quantizers.quantizers_utilsr	   r
   
get_loggerrJ   loggertorchtorch.nnr   r)   r,   rR   rP   r*   r(   <module>rk      s    #  : . U 
		H	%
 04 $!%00,0 0 	0
 0 0 0f =  F7m 7r*   