
    Z jj0                    J   S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	  SSK
Jr  SS	KJrJr  \(       a  SS
KJr  SSKJr  0 r\\" SS9 " S S5      5       5       r " S S5      r\" S5      rSS jr        SS jrSSS jjr\R4                  " 5       rSS jrSSS.S jjrg) z
Contains the logic for automatic additional output capture with our forward decorators.
This mostly describe the hooks used and the logic to make capture thread/context safe.
    )annotationsN)
ContextVar)	dataclasswraps)TYPE_CHECKING   )is_torchdynamo_compilingrequires)nn   PreTrainedModel)torch)backendsc                  N    \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   SrS	\S'   Sr	g)OutputRecorder'   a  
Configuration for recording outputs from a model via hooks.

Attributes:
    target_class (Type): The class (e.g., nn.Module) to which the hook will be attached.
    index (Optional[int]): If the output is a tuple/list, optionally record only at a specific index.
    layer_name (Optional[str]): Name of the submodule to target (if needed), e.g., "transformer.layer.3.attn".
    class_name (Optional[str]): Name of the class to which the hook will be attached. Could be the suffix of class name in some cases.
ztype[nn.Module]target_classr   intindexN
str | None
layer_name
class_name )
__name__
__module____qualname____firstlineno____doc____annotations__r   r   r   __static_attributes__r       t/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/utils/output_capturing.pyr   r   '   s,     "!E3N!J
!!J
!r#   r   c                  0    \ rS rSrSrS rS rS rS rSr	g)	CompileableContextVar:   a  
Convenience wrapper around a ContextVar for usage with `torch.compile`.
This behaves exactly as a `ContextVar`, except when compilation is triggered in which case it behaves as a simple
global variable. This is useful as `torch.compile` cannot trace the `get` method of `ContextVar`. This however means
that the access to the underlying variable is not thread-safe when compilation is triggered.
c                >    [        US S9U l        S U l        SU l        g )N)defaultF)r   context_var
global_var	compiling)selfnames     r$   __init__CompileableContextVar.__init__B   s    %dD9r#   c                p    U R                   (       a  U R                  $ U R                  R                  5       $ N)r,   r+   r*   get)r-   s    r$   r3   CompileableContextVar.getG   s(    >>??"##''))r#   c                r    [        5       (       a  Xl        SU l        g U R                  R	                  U5      $ )NT)r
   r+   r,   r*   set)r-   values     r$   r6   CompileableContextVar.setN   s1    #%%#O!DN##''..r#   c                    U R                   (       d  Uc  S U l        SU l         g U R                  R                  U5        g )NF)r,   r+   r*   reset)r-   tokens     r$   r:   CompileableContextVar.resetV   s/    >>U]"DO"DN""5)r#   )r,   r*   r+   N)
r   r   r   r   r    r/   r3   r6   r:   r"   r   r#   r$   r&   r&   :   s    
*/*r#   r&   output_collectorc                8   ^^ UU4S jnU R                  U5        g)zaInstall the forward hook needed to capture the output described by `key` and `index` in `module`.c                L  > [         R                  5       nUb  TUR                  5       ;  a  g TS:X  a)  [        UT   5      S:X  a  UT   R	                  US   5        [        U[        5      (       d  UT   R	                  U5        g UT   b  UT   R	                  UT   5        g g )Nhidden_statesr   )_active_collectorr3   keyslenappend
isinstancetuple)moduleargsoutputcollected_outputsr   keys       r$   output_capturing_hook<install_output_capturing_hook.<locals>.output_capturing_hooke   s    -113$3D3I3I3K(K/!c*;C*@&AQ&Fc"))$q'2&%((c"))&1E]&c"))&-8 'r#   N)register_forward_hook)rG   rK   r   rL   s    `` r$   install_output_capturing_hookrO   b   s    9   !67r#   c                   SSK Jn  U R                  5        H6  u  pE[        XS5      (       d  [	        XQ SU 3U5        M(  [        XQ SU 3S9  M8     U H  u  pgUR                  b  [        XR                  5      (       d1  UR                  c  M;  UR                  UR                  5      (       d  M]  UR                  b  UR                  U;  a  M|  [        XUR                  5        M     g)a  
Recursively install all output capturing hooks on all submodules of `parent_module`.
Note that we need to use this recursive approach instead of simply iterating over all modules, because we want
to respect the `capture_tasks` of all individual submodels (`PreTrainedModel` instances) in the graph. That is, once
we reach a submodel in the graph, its children should use this submodel's `capture_tasks`, but other parts of the graph
should not.
r   r   .)prefixN)modeling_utilsr   named_childrenrE   recursively_install_hooks"install_all_output_capturing_hooksr   r   endswithr   rO   r   )parent_modulemodule_namecapture_tasksr   r.   rG   rK   specss           r$   rU   rU   v   s     1 &446&22%fQtf.E}U /vQtf>UV 7 $
*z-I[I[/\/\([-A-A%BRBR-S-S+0@0@0S)-ekkJ $r#   c                   [         R                  [        U R                  5      5      =(       d    0 n/ nUR	                  5        H  u  pE[        U[        5      (       d  U/nU Hp  n[        U[        5      (       dF  SU;   a  SOSn[        U[        5      (       d  SOUn[        U[        5      (       d  UOSn	[        XUS9nUR                  XF45        Mr     M     Ub  UOSn[        XU5        [        U SS5        g)	z
Install the output recording hooks on all the modules in `model`. This will take care of correctly dispatching
the `_can_record_outputs` property of each individual submodels in case of composite models.
r@   r   r	   N)r   r   r    !_output_capturing_hooks_installedT)_CAN_RECORD_REGISTRYr3   str	__class__itemsrE   listr   rD   rU   setattr)
modelrR   capture_flagsrZ   rK   layer_specsr[   r   r   r   s
             r$   rV   rV      s     ),,S-ABHbMM)//1+t,,&-K Ee^44,3)3E3)?)?TU
,6uc,B,Bu&LZde  #. ! 2 )VrFe];E6=r#   c                    [        U SS5      (       a  g[           [        U SS5      (       a
   SSS5        g[        U 5        SSS5        g! , (       d  f       g= f)z
Check if the model already has output capturing hooks installed, and install them if it is not already the
case.
Note that this is thread-safe, in case 2 (or more) threads want to install them concurrently.
r^   FN)getattr_hook_installation_lockrV   )re   s    r$   maybe_install_capturing_hooksrk      sM     u95AA	  5=uEE	 
!	  	+51 
!	 	 s   AA
AT)tie_last_hidden_statesc               *   ^ U4S jnU b  U" U 5      $ U$ )a  
Decorator to intercept specific layer outputs through hooks. The hooks are installed only once and lazily,
the first time output capture is requested with the `output_xxx` kwargs/config.
The implementation is fully context/thread safe, except when using `torch.compile`, as dynamo is unable to trace
through `ContextVar` methods.

Args:
    tie_last_hidden_states (`bool`, *optional*, defaults to `True`):
        Whether to overwrite `out.hidden_states[-1]` with the `out.last_hidden_state`.
        This is true for all language models and should be toggled off only if
        `out.hidden_states[-1]` has to be the hidden state before last layer norm, which
        is needed for some vision models (e.g. CLIP, SigLIP)
c                4   >^  [        T 5      U U4S j5       nU$ )Nc                  > UR                  S[        U R                  SS5      5      n[        R	                  [        U R                  5      5      =(       d    0 nU Vs0 s H4  nSU 3UR	                  SU 3[        U R                  SU 3S5      5      _M6     nnSU;   a*  UR	                  S[        U R                  SS5      5      US'   SU;   a*  UR	                  S[        U R                  SS5      5      US	'   UR                  5        VVs0 s H!  u  pWU(       d  M  UR                  SS
5      / _M#     nnn[        U5      S:  a  [        U 5        [        R                  U5      n	 T" U /UQ70 UD6n
[        R                  U	5        U GH  nUS:X  a  T(       d  Oo[        U
S5      (       a'  X   S S X'   X   R                  U
R                   5        O7[        U
S5      (       a&  X   S S X'   X   R                  U
R"                  5        [%        X   5      X'   M  US:X  aj  ['        XK   [(        5      (       aA  [        XK   5      S:X  a0  [%        X   SS S2   5      X'   [%        X   SS S2   5      U
SU-   '   M  [%        X   5      X'   GM  [%        X   5      X'   GM     USL a  U
R+                  5       n
U
$ s  snf s  snnf ! [        R                  U	5        f = f)Nreturn_dictToutput_Fcross_attentionsoutput_attentionsoutput_cross_attentionsmask_decoder_attentionsoutput_mask_decoder_attentionsr]   r   r@   vision_hidden_stateslast_hidden_state
attentionsr   r	   cross_)popri   configr_   r3   r`   ra   rb   replacerC   rk   rA   r6   r:   hasattrrD   rw   ry   rF   rE   rc   to_tuple)r-   rH   kwargsrp   capturable_flagskrecordable_keysvrJ   output_tokenoutputsrK   funcrl   s               r$   wrapper4capture_outputs.<locals>.wrapped_fn.<locals>.wrapper   s    !**]GDKKX\4]^K  477DNN8KLRPR *)A !vzzGA3-PWXYWZm]b9cdd)  
 "%55=CZZ'>QSX)Y> 9: ),<<DJJJ'>QSX)YE @A KZJ_J_Ja gJa$!ef!=9b!92!=Ja g$%)-d3,001BCL6t5d5f5 "''5 )/)1 *@AA1B1G1L).).55g6R6RS *=>>1B1G1L).).55g6O6OP#():)?#@GLL(!"2"7>>3GWG\C]abCb',->-CADqD-I'J278I8NqtRSt8T2U3/',->-C'D#():)?#@GL) ), e#!**,No !h "''5s   ;J9J>J>)K Kr   )r   r   rl   s   ` r$   
wrapped_fn#capture_outputs.<locals>.wrapped_fn   s!    	t=	 
=	~ r#   r   )r   rl   r   s    ` r$   capture_outputsr      s#    AF $r#   )rG   	nn.ModulerK   r`   r   r   returnNone)rX   r   rY   r`   rZ   z list[tuple[str, OutputRecorder]]r   r   r2   )re   r   rR   r   r   r   )re   r   r   r   )r    
__future__r   	threadingcontextvarsr   dataclassesr   	functoolsr   typingr   import_utilsr
   r   r   r   rS   r   r_   r   r&   rA   rO   rU   rV   Lockrj   rk   r   r   r#   r$   <module>r      s   
 #  " !    < 0   	:" "  ""!* !*J **<= 8(KK+.K?_K	K@>: $..* 2&T Tr#   