
    S jW                       % S SK Jr  S SKrS SKJr  S SKJrJrJrJrJ	r	  S SK
JrJrJr  S SKrS SKJr  \(       a  S SKJr  SS	KJr  / S
Qr\" S5      r\" S5      r\" \R0                  S5      (       d]  \" S5      \R0                  R2                  S'   \" S5      \R0                  R2                  S'   \" S5      \R0                  R2                  S'   S SKJrJrJr  SS jrS S jr " S S\5      r  " S S5      r!\	S\S\"4   4   r#S\$S'   \   S!           S"S jj5       r%\   S!           S#S jj5       r%   S!           S$S jjr%g)%    )annotationsN)Callable)OptionaloverloadTYPE_CHECKING	TypeAliasUnion)	ParamSpecSelfTypeVar)Tensor)_POOL_HANDLE   )_dummy_type)is_current_stream_capturinggraph_pool_handleXPUGraphgraphmake_graphed_callables_R_P_XpuStreamBase	_XPUGraph_xpu_graph_pool_handle_xpu_isCurrentStreamCapturing)r   r   r   c                     [        5       $ )zReturn True if XPU graph capture is underway on the current XPU stream, False otherwise.

If a XPU context does not exist on the current device, returns False without initializing the context.
)r        a/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/xpu/graphs.pyr   r   )   s    
 )**r   c                 P    [         R                  R                  [        5       5      $ )zBReturn an opaque token representing the id of a graph memory pool.)torchxpur   r   r   r   r   r   r   1   s    99!!"8":;;r   c                     ^  \ rS rSrSrSSU 4S jjjrSSU 4S jjjrSU 4S jjrSU 4S jjrSU 4S jjr	SU 4S jjr
SU 4S	 jjrSU 4S
 jjrSU 4S jjrSU 4S jjrSU 4S jjrSrU =r$ )r   6   a  Wrapper around a XPU graph.

Arguments:
    keep_graph (bool, optional): If ``keep_graph=False``, the
        executable command graph will be instantiated on GPU at the end of
        ``capture_end`` and the underlying modifiable command graph will be
        destroyed. Note that the executable command graph will not be
        instantiated at the end of ``capture_end`` in this
        case. Instead, it will be instantiated via an explicit called
        to ``instantiate`` or automatically on the first call to
        ``replay`` if ``instantiate`` was not already called. Calling
        ``instantiate`` manually before ``replay`` is recommended to
        prevent increased latency on the first call to ``replay``.

c                "   > [         TU ]  X5      $ N)super__new__)cls
keep_graph	__class__s     r   r(   XPUGraph.__new__G   s    ws//r   c                    > [         TU ]  US9  g)a  Begin capturing XPU work on the current xpu stream.

Typically, you shouldn't call ``capture_begin`` yourself.
Use :class:`~torch.xpu.graph`, which call ``capture_begin`` internally.

Arguments:
    pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
        with the indicated pool.
poolN)r'   capture_begin)selfr/   r+   s     r   r0   XPUGraph.capture_beginJ   s     	4(r   c                "   > [         TU ]  5         g)zEnd XPU graph capture on the current stream.

After ``capture_end``, ``replay`` may be called on this instance.

Typically, you shouldn't call ``capture_end`` yourself.
Use :class:`~torch.xpu.graph`, which call ``capture_end`` internally.
N)r'   capture_endr1   r+   s    r   r4   XPUGraph.capture_endW   s     	r   c                "   > [         TU ]  5         g)a  Instantiate the XPU graph. Will be called by
``capture_end`` if ``keep_graph=False``, or by ``replay`` if
``keep_graph=True`` and ``instantiate`` has not already been
explicitly called. Does not destroy the xpu modify command graph returned
by ``raw_xpu_graph``.
N)r'   instantiater5   s    r   r8   XPUGraph.instantiatea   s     	r   c                "   > [         TU ]  5         g)z+Replay the XPU work captured by this graph.N)r'   replayr5   s    r   r;   XPUGraph.replayj   s    r   c                "   > [         TU ]  5         g)z1Delete the graph currently held by this instance.N)r'   resetr5   s    r   r>   XPUGraph.resetn   s    r   c                    > [         TU ]  5       $ )zReturn an opaque token representing the id of this graph's memory pool.

This id can optionally be passed to another graph's ``capture_begin``,
which hints the other graph may share the same memory pool.
)r'   r/   r5   s    r   r/   XPUGraph.poolr   s     w|~r   c                    > [         TU ]  5       $ )z.Enable debugging mode for XPUGraph.debug_dump.)r'   enable_debug_moder5   s    r   rC   XPUGraph.enable_debug_modez   s    w(**r   c                "   > [         TU ]  U5      $ )z
Arguments:
    debug_path (required): Path to dump the graph to.

Calls a debugging function to dump the graph if the debugging is
enabled via XPUGraph.enable_debug_mode()
)r'   
debug_dump)r1   
debug_pathr+   s     r   rF   XPUGraph.debug_dump~   s     w!*--r   c                    > [         TU ]  5       $ )zuReturns the underlying xpuGraph_t. ``keep_graph`` must be True.

XPU doesn't provide APIs to manipulate this object.
)r'   raw_xpu_graphr5   s    r   rJ   XPUGraph.raw_xpu_graph   s    
 w$&&r   c                    > [         TU ]  5       $ )a  Returns the underlying xpuGraphExec_t. ``instantiate`` must have been called if ``keep_graph`` is True, or ``capture_end`` must have been called if ``keep_graph`` is False. If you call ``instantiate()`` after ``raw_xpu_graph_exec()``, the previously returned xpuGraphExec_t will be destroyed. It is your responsibility not to use this object after destruction.

XPU doesn't provide APIs to manipulate this object.
)r'   raw_xpu_graph_execr5   s    r   rM   XPUGraph.raw_xpu_graph_exec   s    
 w)++r   r   )F)r*   boolreturnr   r&   )r/   Optional[_POOL_HANDLE]rP   NonerP   rR   rP   r   )rG   strrP   rR   )rP   int)__name__
__module____qualname____firstlineno____doc__r(   r0   r4   r8   r;   r>   r/   rC   rF   rJ   rM   __static_attributes____classcell__)r+   s   @r   r   r   6   sN     0 0) )+.', ,r   r   c                  X    \ rS rSr% SrSrS\S'     S
     SS jjrSS jrSS jr	S	r
g)r      a^  Context-manager that captures XPU work into a :class:`torch.xpu.XPUGraph` object for later replay.

Arguments:
    xpu_graph (torch.xpu.XPUGraph): Graph object used for capture.
    pool (optional): Opaque token (returned by a call to :func:`~torch.xpu.graph_pool_handle()` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) hinting this graph's capture
        may share memory from the specified pool.
    stream (torch.xpu.Stream, optional): If supplied, will be set as the current stream in the context.
        If not supplied, ``graph`` sets its own internal side stream as the current stream in the context.

.. note::
    For effective memory sharing, if you pass a ``pool`` used by a previous capture and the previous capture
    used an explicit ``stream`` argument, you should pass the same ``stream`` argument to this capture.

NOptional[torch.xpu.Stream]default_capture_streamc                D   U R                   R                  c-  [        R                  R	                  5       U R                   l        Uc  SOU4U l        Ub  UOU R                   R                  U l        U R                  c  [        S5      eU R                  U l        Xl	        g )Nr   zcapture_stream must not be None)
r+   ra   r!   r"   Streamr/   capture_streamAssertionError
stream_ctx	xpu_graph)r1   rg   r/   streams       r   __init__graph.__init__   s     >>00849II4D4D4FDNN1 ,BTG 		 (Fdnn.S.S 	 & !BCC--"r   c                    [         R                  R                  5         [         R                  R                  5         U R                  R                  5         U R                  R                  " U R                  6   g r&   )	r!   r"   synchronizeempty_cacherf   	__enter__rg   r0   r/   )r1   s    r   rn   graph.__enter__   sH    				!!#$$dii0r   c                j    U R                   R                  5         U R                  R                  " U6   g r&   )rg   r4   rf   __exit__)r1   argss     r   rq   graph.__exit__   s$    ""$  $'r   )rd   r/   rf   rg   )NN)rg   r   r/   rQ   rh   r`   rS   )rr   objectrP   rR   )rW   rX   rY   rZ   r[   ra   __annotations__ri   rn   rq   r\   r   r   r   r   r      sH      :>6=
 (,-1	## %# +	#.1(r   r   torch.nn.Module.r   _ModuleOrCallablec                    g r&   r   	callablessample_argsnum_warmup_itersallow_unused_inputr/   s        r   r   r      s     r   c                    g r&   r   ry   s        r   r   r      s     %(r   c                4   [         R                  " 5       (       a%  [         R                  " 5       (       a  [        S5      eSn[	        U [
        5      (       d+  SnU 4n [        R                  " [
        [        S4   U5      4nO-[        R                  " [
        [
        [        S4   S4   U5      n/ n[        X5       GH  u  p[	        U[         R                  R                  5      (       a  [        UR                  5      S:X  a2  [        UR                  5      S:X  a  [        UR                  5      S:X  d  [        S5      e[!        S UR#                  5        5       5      (       d  [        S5      e[         R$                  R&                  R(                  " U	6 n
UR+                  [        U
5      5        [!        S	 U
 5       5      (       a  GM  [-        S
5      e   U V	s/ s H  n	[        U	5      PM     nn	U  Vs/ s HG  n[	        U[         R                  R                  5      (       a  [        UR/                  5       5      OSPMI     nn[1        [        U 5      5       Vs/ s H  nX}   X   -   PM     nn[1        [        U 5      5       Vs/ s H!  n[         R2                  R5                  5       PM#     nn[1        [        U 5      5       Vs/ s H!  n[         R2                  R5                  5       PM#     nnUc
  [7        5       OUn[         R2                  R9                  5         [         R2                  R;                  [         R2                  R=                  5       5         [        XU5       H  u  nn	nSu  nnn[1        U5       H  n[         R$                  R&                  R?                  U" U	6 5      n[        S U 5       5      n[        U5      S:  d  MR  [         R@                  RC                  U[        S U 5       5      [        S U 5       5      SUS9nM     UUU4 H  nAM     M     SSS5        [         R2                  R9                  5         / n/ n[        XU5       H  u  nn	n[         R2                  RE                  UUS9   U" U	6 nSSS5        [         R$                  R&                  RG                  W5      u  nnUR+                  [        U5      5        UR+                  U5        M     / n/ n [        [I        U5      [I        U5      [I        U5      5       GH  u  nn!n"[        S U! 5       5      n#[        S U! 5       5      nSn[        U5      S:  ah  [         R2                  RE                  U"US9   [         R@                  RC                  U[        S U 5       5      [        S U# 5       5      SUS9nSSS5        / n$Sn%U HC  n&U&RJ                  (       a  Ub  U$R+                  UU%   5        U%S-  n%M2  U$R+                  S5        ME     [        U$5      n$UR+                  U#5        U R+                  U$5        GM"     URM                  5         U RM                  5                             SS jn'/ n([O        U 5       H  u  nnU'" UU   UU   X   X   UU   X   UU   UU   U U   5	      n)[	        U[         R                  R                  5      (       aF            SS jn*U*" UURP                  U)URR                  5      Ul)        U(R+                  U5        M  U(R+                  U)5        M     U(       a  U(S   $ [        U(5      $ s  sn	f s  snf s  snf s  snf s  snf ! , (       d  f       GNC= f! , (       d  f       GN= f! , (       d  f       GN= f)a  Accept callables (functions or :class:`nn.Module<torch.nn.Module>`\ s) and returns graphed versions.

Each graphed callable's forward pass runs its source callable's
forward XPU work as a XPU graph inside a single autograd node.

The graphed callable's forward pass also appends
a backward node to the autograd graph. During backward, this node runs the
callable's backward work as a XPU graph.

Therefore, each graphed callable should be a drop-in replacement for its source callable
in an autograd-enabled training loop.

See :ref:`Partial-network capture<partial-network-capture>` for detailed use and constraints.

If you pass a tuple of several callables, their captures will use the same memory pool.

Arguments:
    callables (torch.nn.Module or Python function, or tuple of these): Callable or callables to graph.
        If you pass a tuple of callables, their order in the tuple must be the same order they'll run
        in the live workload.
    sample_args (tuple of Tensors, or tuple of tuples of Tensors): Samples args for each callable.
        If a single callable was passed, ``sample_args`` must be a single tuple of argument Tensors.
        If a tuple of callables was passed, ``sample_args`` must be tuple of tuples of argument Tensors.
    num_warmup_iters (int): The number of warmup iterations. Currently, ``DataDistributedParallel`` needs
        11 iterations for warm up. Default: ``3``.
    allow_unused_input (bool): If False, specifying inputs that were not used when computing outputs
        (and therefore their grad is always zero) is an error. Defaults to False.
    pool (optional): Token (returned by :func:`~torch.xpu.graph_pool_handle` or
        :meth:`other_Graph_instance.pool()<torch.xpu.XPUGraph.pool>`) that hints this graph may share memory
        with the indicated pool.
.. note::
    The ``requires_grad`` state of each Tensor in ``sample_args`` must match the state
    that's expected for the corresponding real input in the training loop.

.. warning::
    This API is in beta and may change in future releases.

.. warning::
    ``sample_args`` for each callable must contain only Tensors. Other types are not allowed.

.. warning::
    Returned callables do not support higher order differentiation (e.g., double backward).

.. warning::
    In any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters
    may be trainable. Buffers must have ``requires_grad=False``.

.. warning::
    After you pass a :class:`torch.nn.Module` through :func:`~make_graphed_callables`,
    you may not add or remove any of that Module's parameters or buffers.

.. warning::
    :class:`torch.nn.Module`\s passed to :func:`~torch.xpu.make_graphed_callables` must not have module hooks
    registered on them at the time they are passed. However, registering hooks on modules *after* passing them
    through :func:`~torch.xpu.make_graphed_callables` is allowed.

.. warning::
    When running a graphed callable, you must pass its arguments in the same order and format
    they appeared in that callable's ``sample_args``.

.. warning::
    The automatic mixed precision is supported in :func:`~torch.xpu.make_graphed_callables` only with disabled
    caching. The context manager `torch.amp.autocast()` must have `cache_enabled=False`.
z_make_graphed_callables does not support the autocast caching. Please set `cache_enabled=False`.FT.r   zModules must not have hooks registered at the time they are passed. However, registering hooks on modules after passing them through make_graphed_callables is allowed.c              3  <   #    U  H  oR                   S L v   M     g7f)FNrequires_grad.0bs     r   	<genexpr>)make_graphed_callables.<locals>.<genexpr>I  s     EA%/s   zIn any :class:`~torch.nn.Module` passed to :func:`~make_graphed_callables`, only parameters may be trainable. All buffers must have ``requires_grad=False``.c              3  V   #    U  H  n[        U[        R                  5      v   M!     g 7fr&   )
isinstancer!   r   )r   args     r   r   r   Q  s     HKS:c5<<00Ks   ')zfIn the beta API, sample_args for each callable must contain only Tensors. Other types are not allowed.r   N)NNNc              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr&   r   r   os     r   r   r   q  s     $K1??QQ   #	#c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr&   r   r   is     r   r   r   u  s      %';!AA';r   c              3  r   #    U  H-  oR                   (       d  M  [        R                  " U5      v   M/     g 7fr&   r   r!   
empty_liker   s     r   r   r   x  s(      +9@AOO/E,,Q//s   77)outputsinputsgrad_outputsonly_inputsallow_unusedr.   c              3  r   #    U  H-  oR                   (       a  [        R                  " U5      OS v   M/     g 7fr&   r   r   s     r   r   r     s'      $
FT??EQ<ns   57c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr&   r   r   s     r   r   r     s     J1//QQr   c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr&   r   r   s     r   r   r     s      T,@qOO,@r   c              3  .   #    U  H  oc  M  Uv   M     g 7fr&   r   r   s     r   r   r     s     &W2EQqq2Es   	   c	           	        ^ ^^^^^^^^^
  " UU UUUUU4S jS[         R                  R                  5      m
SU
UU4S jjn	U	$ )Nc                     > \ rS rSr\SUUUU4S jj5       r\\R                  R                  R                  SU UU4S jj5       5       r
Srg)Omake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphedi  c                *  > [        T5       HB  nTU   R                  5       X   R                  5       :w  d  M,  TU   R                  X   5        MD     TR                  5         [	        T[
        5      (       d  [        S5      e[        S T 5       5      $ )Nzstatic_outputs must be a tuplec              3  @   #    U  H  oR                  5       v   M     g 7fr&   detachr   s     r   r   jmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward.<locals>.<genexpr>  s     @AXXZZs   )rangedata_ptrcopy_r;   r   tupleRuntimeError)ctxr   r   	fwd_graphlen_user_argsstatic_input_surfacestatic_outputss      r   forwardWmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.forward  s     }-A+A.779VY=O=O=QQ,Q/55fi@ .   "!.%88&'GHH@@@@r   c                  > [        U5      [        T5      :w  a#  [        S[        T5       S[        U5       35      e[        TU5       H?  u  p#Uc  M
  UR                  5       UR                  5       :w  d  M.  UR	                  U5        MA     TR                  5         [        T[        5      (       d  [        S5      e[        S T 5       5      $ )Nz	Expected z gradients but got z"static_grad_inputs must be a tuplec              3  L   #    U  H  nUb  UR                  5       OUv   M     g 7fr&   r   r   s     r   r   kmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward.<locals>.<genexpr>  s(       0 #$-AHHJQ6/s   "$)lenr   zipr   r   r;   r   r   )r   gradsggrad	bwd_graphstatic_grad_inputsstatic_grad_outputss       r   backwardXmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.Graphed.backward  s     u:%8!99&#C(;$<#==PQTUZQ[P\]   ##6>GA}::<4==?:GGDM  ?   "!"4e<<&'KLL  0  r   r   N)r   rt   r   r   rP   tuple[Tensor, ...])r   rt   r   r   rP   r   )rW   rX   rY   rZ   staticmethodr   r!   autogradfunctiononce_differentiabler   r\   )r   r   r   r   r   r   r   s   r   Graphedr     sN    A A A ^^$$88  9 r   r   c                    > [         R                  R                  R                  " U 6 nTR                  " [        U5      T-   6 n[         R                  R                  R                  UT5      $ r&   )r!   utils_pytreearg_tree_leavesapplyr   tree_unflatten)	user_argsflatten_user_argsoutr   module_paramsoutput_unflatten_specs      r   functionalizedVmake_graphed_callables.<locals>.make_graphed_autograd_function.<locals>.functionalized  sU     % 3 3 C CY O--%(9":]"JLC;;&&55c;PQQr   )r   rt   rP   rt   )r!   r   Function)r   r   r   r   r   r   r   r   r   r   r   s   ````````` @r   make_graphed_autograd_function>make_graphed_callables.<locals>.make_graphed_autograd_function  s4    !	 !	enn-- !	F	R 	R r   c                $   ^ ^^^ SU UUU4S jjnU$ )Nc                 D   > TR                   T:X  a  T" U 0 UD6$ T" U 0 UD6$ r&   )training)r   user_kwargsfuncgraph_training_stategraphedorig_fwds     r   new_fwdEmake_graphed_callables.<locals>.make_graphed_forward.<locals>.new_fwd  s0    }}(<<&	A[AA'BkBBr   )r   z_P.argsr   z	_P.kwargsrP   r   r   )r   r   r   r   r   s   ```` r   make_graphed_forward4make_graphed_callables.<locals>.make_graphed_forward  s    C C r   )r   r   r   r   r   ztuple[torch.nn.Parameter, ...]r   rV   r   ztorch.utils._pytree.TreeSpecr   r   r   r   r   ztuple[Optional[Tensor], ...]r   r   rP   zCallable[..., object])
r   rv   r   rO   r   Callable[_P, _R]r   r   rP   r   )*r!   is_autocast_enabledis_autocast_cache_enabledr   r   r   typingcastr   r   nnModuler   _backward_hooks_forward_hooks_forward_pre_hooksallbuffersr   r   r   append	TypeError
parametersr   r"   r   r   rl   rh   rc   tree_leavesr   r   r   tree_flattenreversedr   reverse	enumerater   r   )+rz   r{   r|   r}   r/   just_one_callable_sample_argsflatten_sample_argscrr   flatten_argper_callable_len_user_argsper_callable_module_paramsr   "per_callable_static_input_surfaces_
fwd_graphs
bwd_graphsmempoolr   r   grad_inputsr   outputs_gradvper_callable_static_outputs"per_callable_output_unflatten_specr   func_outputsflatten_outputsspec per_callable_static_grad_outputsper_callable_static_grad_inputsr   r   r   r   grad_idxr   r   retr   r   s+                                              r   r   r      sH   N   ""u'F'F'H'Hm
 	
  i'' L	E&#+$6DF{{5vs{);S)@#A;Oy/a))A%%&!+(()Q.,,-2"a  EEEE"1 
 kk))994@""5#56HKHHH^ ) 06 9L!L8K#d)8K!L "A ",Auxx!?!?allnRG  " s9~&*&A 	!;!>>& ' *
 16c)n0EF0E1%))$$&0EJF05c)n0EF0E1%))$$&0EJF%)\!tG 
II			%))**,	-03%G1
,D$, 2B.K,+,++--99$+F$$K$KK|$q("'.."5"5 ,$ %';%   &+ +9@+ & %)%7 #6 
#K	 - |[9 :'1
 
.. 
II #%)+&!$Yj!IdIYY__YW_5;L 6 !& 3 3 @ @ N#**5+AB*11$7 "J (*$&(#;>34,-<7ni
 $ $
FT$
 
 JJJ|q 9#nn11(  T,@ TT!&&W2E&W!W $!3 2  :  'C  [%<"))+h*?@A"))$/ ( ##56(//0CD'../ABA<F %,,.#++-444 64 	4
  <4 14 +4 :4 /4 
4l $&CY'40qMqM&)&).q1.1'*,Q/+A.

 dEHHOO,,%&* * +	
 " 0dmmWdllDL JJtJJwE (H 1v:{ "M"*
 GF 
.	-< 65, :9sL   ]8A]]	(]	(]A3]$A]$]63A^$
]36
^	
^	)rP   rO   rT   )   FN)rz   rw   r{   r   r|   rV   r}   rO   r/   rQ   rP   rw   )rz   tuple[_ModuleOrCallable, ...]r{   ztuple[tuple[Tensor, ...], ...]r|   rV   r}   rO   r/   rQ   rP   r  )rz   7Union[_ModuleOrCallable, tuple[_ModuleOrCallable, ...]]r{   z9Union[tuple[Tensor, ...], tuple[tuple[Tensor, ...], ...]]r|   rV   r}   rO   r/   rQ   rP   r  )&
__future__r   r   collections.abcr   r   r   r   r   r	   typing_extensionsr
   r   r   r!   r   	torch.xpur   _utilsr   __all__r   r   hasattr_C__dict__torch._Cr   r   r   r   r   r   r   rt   rw   ru   r   r   r   r   <module>r     s   "  $ F F 6 6   &   T]t_uxx)**%0%=EHHk"2=>V2WEHH./9D':EHH56
 V U+<
^,y ^,B5( 5(p  %%6f8M%MN 9 N 
 $#' #  	
 !  
 
 $#'(,(/( ( 	(
 !( #( 
( $#'pFpJp p 	p
 !p =pr   