
    Z j/                     $	   S SK r S SKrS SKJr  S SKJr  S SKr\R                  R                  R                  \R                  R                  R                  \R                  R                  R                  \R                  R                  R                  \R                  R                  R                  \R                  R                  R                  \R                  R                  R                  \R                  R                  R                   \R                  R                  R"                  \R                  R                  R$                  \R                  R                  R&                  \R                  R                  R(                  \R                  R                  R*                  \R                  R                  R,                  S.r S*S\R0                  S\S\S\R4                  S-  S	\R0                  4
S
 jjr	 S*S\R0                  S\S\S\R4                  S-  S	\R0                  4
S jjr
S\R0                  S\S	\R0                  4S jrS\R0                  S	\R0                  4S jrS\R0                  S	\R0                  4S jrS\R0                  S	\R0                  4S jrS+S\R0                  S\S	\R0                  4S jjrS,S\R0                  S\S\R4                  S-  S	\R0                  4S jjrS,S\R0                  S\S\R4                  S-  S	\R0                  4S jjr    S-S\R0                  S\S\S\S\R4                  S-  S	\R0                  4S jjr    S-S\R0                  S\S\S\S\R4                  S-  S	\R0                  4S jjr     S.S\R0                  S\S\S\S\S\R4                  S-  S	\R0                  4S jjr  S/S\R0                  S\S\R4                  S-  S	\R0                  4S jjr S0S\R0                  S\S\S\R4                  S-  S	\R0                  4
S jjrS\R0                  S \R0                  S	\R0                  4S! jrS1S" jrS# rS$ r S%r!\S& 5       r"\S' 5       r#\S( 5       r$\S) 5       r%g)2    N)defaultdict)contextmanager)uniform_normal_	constant_ones_zeros_eye_dirac_xavier_uniform_xavier_normal_kaiming_uniform_kaiming_normal_trunc_normal_orthogonal_sparse_tensorab	generatorreturnc                 F    [        U SS5      (       d  [        S   " XX#S9$ U $ )N_is_hf_initializedFr   )r   r   r   getattrTORCH_INIT_FUNCTIONS)r   r   r   r   s       l/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/initialization.pyr   r   *   s+     6/77#J/qVVM    meanstdc                 F    [        U SS5      (       d  [        S   " XX#S9$ U $ )Nr   Fr   )r   r    r   r   )r   r   r    r   s       r   r   r   2   s+     6/77#I.vc__Mr   valc                 D    [        U SS5      (       d  [        S   " XS9$ U $ )Nr   Fr   )r"   r   )r   r"   s     r   r   r   :   s'    6/77#K0AAMr   c                 H    [        U SS5      (       d  [        S   " U 5      $ U $ )Nr   Fr   r   r   s    r   r   r   @   s'    6/77#G,V44Mr   c                 H    [        U SS5      (       d  [        S   " U 5      $ U $ )Nr   Fr	   r   r%   s    r   r	   r	   F   s'    6/77#H-f55Mr   c                 H    [        U SS5      (       d  [        S   " U 5      $ U $ )Nr   Fr
   r   r%   s    r   r
   r
   L   s'    6/77#F+F33Mr   groupsc                 D    [        U SS5      (       d  [        S   " XS9$ U $ )Nr   Fr   )r(   r   )r   r(   s     r   r   r   R   s'    6/77#H-fDDMr   gainc                 F    [        U SS5      (       d  [        S   " XUS9$ U $ )Nr   Fr   r*   r   r   r   r*   r   s      r   r   r   X   s+    6/77#$56vT]^^Mr   c                 F    [        U SS5      (       d  [        S   " XUS9$ U $ )Nr   Fr   r,   r   r-   s      r   r   r   ^   s+    6/77#$45fS\]]Mr   modenonlinearityc                 H    [        U SS5      (       d  [        S   " XX#US9$ U $ )Nr   Fr   r   r/   r0   r   r   r   r   r/   r0   r   s        r   r   r   d   s5     6/77#$67d
 	
 Mr   c                 H    [        U SS5      (       d  [        S   " XX#US9$ U $ )Nr   Fr   r2   r   r3   s        r   r   r   r   s5     6/77#$56d
 	
 Mr   c           	      H    [        U SS5      (       d  [        S   " XX#XES9$ U $ )Nr   Fr   )r   r    r   r   r   r   )r   r   r    r   r   r   s         r   r   r      s.     6/77#O4VCXYooMr   c                 F    [        U SS5      (       d  [        S   " XUS9$ U $ )Nr   Fr   r,   r   r-   s      r   r   r      s,    
 6/77#M26PYZZMr   sparsityc                 F    [        U SS5      (       d  [        S   " XX#S9$ U $ )Nr   Fr   )r7   r    r   r   )r   r7   r    r   s       r   r   r      s+     6/77#I.vcggMr   otherc                     [        U SS5      (       d0  [        R                  " 5          U R                  U5      sS S S 5        $ U $ ! , (       d  f       U $ = f)Nr   F)r   torchno_gradcopy_)r   r9   s     r   r=   r=      s@    6/77]]_<<& _M _Ms   A
Ac                    [         R                  R                  R                  U 5      u  p4US:X  a  UnOUS:X  a  UnOUS:X  a  X4-   S-  nSW-  nUS:X  a"  [	        U [
        R                  " U5      S-  S9  g US	:X  a  [        U [
        R                  " U5      S9  g US
:X  a'  [
        R                  " SU-  5      n[        X* U5        g [        SU 35      e)Nfan_infan_outfan_avg         ?truncated_normalg۶%?)r    normaluniform   zinvalid distribution )
r;   nninit_calculate_fan_in_and_fan_outr   mathsqrtr   r   
ValueError)r   r/   distributionr?   r@   denomvariancebounds           r   _variance_scalingrR      s    hhmmAA&IOFx				!Q&U{H))f$))H"58K"KL		!DIIh/0		"		!h,''0?@@r   c                 @    [        U SS5      (       d  [        U SSS9  U $ )Nr   Fr?   rD   r/   rN   r   rR   r%   s    r   lecun_normal_rV      s$    6/77&x>PQMr   c                 @    [        U SS5      (       d  [        U SSS9  U $ )Nr   Fr?   rE   rT   rU   r%   s    r   default_flax_embed_init_rX      s#    6/77&xhGMr   )
ztorch.nn.initztorch.nn.modules.activationztorch.nn.modules.transformerztorch.nn.modules.linearztorch.nn.modules.lossztorch.nn.modules.batchnormztorch.nn.modules.convztorch.nn.modules.normalizationztorch.nn.modules.rnnztorch.nn.modules.sparsec            	   #   J  #    [        [        5      n  [         H  nU[        R                  ;   d  M  [        R                  U   n[
        R                  5        H<  n[        X#5      (       d  M  [        X#5      X   U'   [        X#[        5       U   5        M>     M     Sv   U R                  5        H*  u  p$UR                  5        H  u  p5[        X#U5        M     M,     g! U R                  5        H*  u  p$UR                  5        H  u  p5[        X#U5        M     M,     f = f7f)av  
Guard the `torch.nn.init` primitive functions to behave exactly like the functions in this file, i.e. be
protected against the `_is_hf_initialized` flag to avoid re-init if the param was already loaded.

Usually, all models are using the init from `transformers` which are already guarded, but just to make extra sure
and for remote code, we also use this context manager.
N)r   dictTORCH_MODULES_TO_PATCHsysmodulesr   keyshasattrr   setattrglobalsitems)	originalsmodule_namemodule	func_name	functionsfuncs         r   guard_torch_init_functionsri      s      D!I11Kckk)[1!5!:!:!<Iv117>v7Q	))479Y3GH "= 2 	 "+!2F#,??#4	40 $5 "3!2F#,??#4	40 $5 "3s(   D#C  8C  .3C  !?D# A D  D#c            	   #     #    SSK Jn   S n[        [        5      n [         Hu  nU[
        R                  ;   d  M  [
        R                  U   n[        R                  5        H1  n[        XE5      (       d  M  [        XE5      X$   U'   [        XEU5        M3     Mw     U R                  nXl        Sv   UR                  5        H*  u  pGUR                  5        H  u  pX[        XEU5        M     M,     X`l        g! UR                  5        H*  u  pGUR                  5        H  u  pX[        XEU5        M     M,     WU l        f = f7f)aS  
Disable weight initialization both at the torch-level, and at the transformers-level (`init_weights`).
This is used to speed-up initializing an empty model with deepspeed, as we do not initialize the model on meta device
with deepspeed, but we still don't need to run expensive weight initializations as we are loading params afterwards.
   PreTrainedModelc                      g N argskwargss     r   
empty_func#no_init_weights.<locals>.empty_func       r   N)modeling_utilsrm   r   rZ   r[   r\   r]   r   r^   r_   r   r`   init_weightsrb   )	rm   rt   rc   rd   re   rf   original_init_weightsrg   rh   s	            r   no_init_weightsrz      s     0 D!I=1Kckk)[1!5!:!:!<Iv117>v7Q	))4:> "= 2 !0 < <'1$ "+!2F#,??#4	40 $5 "3 (=$	 "+!2F#,??#4	40 $5 "3 (=$s)   E C6 8C6 7:C6 1AE 6AD==E c               #   p   #    SSK Jn   S n U R                  nXl        Sv   X l        g! WU l        f = f7f)a  
Disable weight tying during loading with `from_pretrained`. This is needed as we want to have access to ALL
weights in the state_dict during `from_pretrained`, and otherwise tying them would remove them from it, as it's
called in `post_init` when instantiating.
rk   rl   c                      g ro   rp   rq   s     r   rt   "no_tie_weights.<locals>.empty_func$  rv   r   N)rw   rm   tie_weights)rm   rt   original_tie_weightss      r   no_tie_weightsr     s:      0;.::&0# ';#&:#s   
6* 6	36c               #      ^#    [         R                  mU4S jn U [         l         Sv   T[         l        g! T[         l        f = f7f)av  
During meta-device model initialisation, ``torch.linspace`` produces meta
tensors that have no data.  Custom models loaded from the Hub (remote code)
often call ``.item()`` on these tensors to compute scalar hyperparameters
(e.g. stochastic-depth / drop-path schedules).  Native transformers models
already pass ``device="cpu"`` explicitly for such calls (see e.g.
``modeling_swin.py``, ``modeling_pvt_v2.py``), but remote-code models
written before v5 do not.

This context manager patches ``torch.linspace`` to default to
``device="cpu"`` when no explicit device is requested, matching the best
practice already used throughout transformers.  Calls that supply an
explicit ``device`` argument (e.g. ``device=self.logits.device``) are left
untouched.  ``torch.arange`` is intentionally NOT patched because it is
used in RoPE computations where the device must match model parameters.
c                  8   > UR                  SS5        T" U 0 UD6$ )Ndevicecpu)
setdefault)rr   rs   original_linspaces     r   _safe_linspace5meta_device_safe_creation_ops.<locals>._safe_linspaceE  s#    (E* $1&11r   N)r;   linspace)r   r   s    @r   meta_device_safe_creation_opsr   1  s7     $ 2 $EN+**s   "A6 AAA)        rC   N)rk   )rC   N)r   r?   
leaky_reluN)r   rC   g       g       @N)rk   N)g{Gz?N)r?   rE   )&rK   r\   collectionsr   
contextlibr   r;   rH   rI   r   r   r   r   r	   r
   r   r   r   r   r   r   r   r   r   Tensorfloat	Generatorintstrr=   rR   rV   rX   r[   ri   rz   r   r   rp   r   r   <module>r      s    
 # %  &&xx}}$$((XX]]  hhmm""HHMMhhmm""xx}}44hhmm2266xx}}44XX]]0088==,,xx}}$$ & _cLL"-2EJ__W[E[
\\ dhLL %27JO//\`J`
\\ell  5<< %,, 5<< 5<< ELL  %,, 5<<  U\\ ELL  Z^H^ jojvjv 5<< u uY]G] iniuiu  $(,LL  	
 % \\  $(,LL  	
 % \\  (,
LL


 

 	

 
 %
 \\
 (,LL
 % \\	 cgLL$)05IN[_I_
\\%,, u||  A,  1 14 != !=H ; ;* + +r   