
    N j@                    H   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKJr  S SK	J
r
  S SKJrJr  S SKJrJr  S SKrS SKrS SKJr  S SKJr  S SKJs  Jr  S SKJr  S SKJr  S S	KJr  S
SK J!r!J"r"J#r#  S
SK$J%r%  S
SK&J'r'J(r(J)r)  \(       a  S SK*J+r+J,r,J-r-  S SK.J/r/  S SK0J1r1  \" S5      r2\" S5      r3\Rh                  " \55      r6S&S jr7\S'S j5       r8\#S(S j5       r9 S)         S*S jjr:    S+S jr;\#S,S j5       r< " S S\Rz                  5      r>\#      S-S j5       r?\#S.S j5       r@S/S jrA\R                  R                  rC\CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  R                  \CR                  R                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  \CR                  1r[\" \[5      r[\#S,S j5       r\      S0S jr]      S1S  jr^S q_S!\`S"'   S2S# jra            S3S$ jrb S4       S5S% jjrcg)6    )annotationsN)contextmanager)partial)AnyTYPE_CHECKING)	ParamSpecTypeVar)SymInt)get_decompositions)bind_symbols   )aot_function
aot_modulemake_boxed_compiler)strip_overloads)default_partition
draw_graph#min_cut_rematerialization_partition)Callable	GeneratorSequence)Node)IntLikeType_P_Rc                    U R                   R                  S[        R                  R                  R
                  S9 H,  n[        R                  R                  R                  Ul        M.     U R                  5         U $ )Ncall_functionoptarget)	graph
find_nodestorchopsaten_to_copytor    	recompile)fx_gnodes     k/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/_functorch/compilers.py_canonicalizer,   /   s[    

%%599>>#:#: &  iinn'' 	NNK    c               #     #    [         R                  R                  S5      n  S v   [         R                  R                  U 5        g ! [         R                  R                  U 5        f = f7f)NF)r#   _C_jit_set_autocast_mode)old_jit_autocast_flags    r+   _disable_jit_autocastr2   8   sL      "HH;;EB? 	''(=>''(=>s    A+A  A+!A((A+c                p   [        5          [        U 5        U R                  R                  S[        R
                  R                  R                  S9 Ht  n[        UR                  5      S:X  d  M  [        UR                  5      S:X  d  M9  SUR                  ;   d  MK  [        R
                  R                  R                  Ul        Mv     U R                  R                   H]  n0 nUR                  R                  5        H4  u  pE[        U[        R                   5      (       a  UR"                  nXSU'   M6     X2l
        M_     U R                  R%                  5         U R'                  5         [        R(                  R+                  U 5      n[        R,                  R/                  UR                  5        [        R(                  R1                  UR3                  5       5      n[        R(                  R5                  U5      n[7        S U 5       5      (       d  U" U6   SSS5        U$ ! , (       d  f       W$ = f)z
Compiles the :attr:`fx_g` with Torchscript compiler.

.. warning::
    This API is experimental and likely to change.

Args:
    fx_g(fx.GraphModule): The input Fx graph module to be compiled.

Returns:
    Torch scripted model.
r   r   r   dtypec              3  j   #    U  H)  n[        U[        R                  R                  5      v   M+     g 7fN)
isinstancer#   _subclasses
FakeTensor).0ts     r+   	<genexpr>ts_compile.<locals>.<genexpr>n   s&     M1:a!2!2!=!=>>s   13N)r2   r   r!   r"   r#   r$   r%   r&   lenargskwargsr'   r    nodesitemsr7   devicetypelintr(   jitscriptr/   _jit_pass_remove_mutationfreezeevaloptimize_for_inferenceany)r)   inpsr*   
new_kwargskvfs          r+   
ts_compilerR   C   s    
	 JJ))uyy~~'>'> * 
D 499~"s4;;'71'<DKKAW#iinn//	
 JJ$$DJ))+a..A !1 , %K % 	

IIT" 	**1773IIQVVX&II,,Q/MMMMtH; 
!< H= 
!	 < Hs   A"H&1H&H&E>H&&
H5c                D    [        U R                  5        [        XUS9  U $ )N)
clear_meta)printcoder   )r)   _namerT   s       r+   _draw_graph_compilerY   s   s     
$))tj1Kr-   c                0    [        [        [        U S95      $ )NrX   )r   r   rY   r[   s    r+   draw_graph_compiler\   {   s     w':FGGr-   c                    U $ )z
Returns the :attr:`fx_g` Fx graph module as it is. This is a no-op compiler
and can be used to check accuracy.

.. warning::
    This API is experimental and likely to change.

 r)   rW   s     r+   nopr`      s	     Kr-   c                  T   ^  \ rS rSrSSS.       SU 4S jjjrS	U 4S jjrSrU =r$ )
DebugInterpreter   NTinitial_envenable_io_processingc               Z   > [        U R                  /UQ76 U l        [        TU ]  " X1US.6$ )Nrd   )r   modulesymbol_mappingsuperrun)selfre   rf   r?   	__class__s       r+   rk   DebugInterpreter.run   s<     +KK
 

 w{AU
 	
r-   c                .  >^ ^
^^^ SU 4S jjmS	U4S jjmS
U4S jjm
SU
U4S jjn[         TT ]  U5      nSUR                  ;   a  [        R                  " UR                  S   5      u  pE[        R                  " U5      u  pg[        U5      [        U5      :w  a"  [        [        U5       S[        U5       35      e[        [        [        U5      5      XF5       H5  u  mp[        U	[        R                  5      (       d  M'  U" XUU 4S j5        M7     U$ )Nc                  > [        U [        5      (       d  U $ [        R                  " U R                  R
                  R                  TR                  5      5      nUR                  (       d  [        SU 35      e[        U5      $ )Nzexpected r to be a number, got )r7   r
   sympyexpandr*   exprxreplaceri   	is_numberAssertionErrorint)nirrl   s     r+   subst_symint/DebugInterpreter.run_node.<locals>.subst_symint   s_    b&))	RWW\\2243F3FGHA;;$'Fqc%JKKq6Mr-   c                .   > [        U4S jU  5       5      $ )Nc              3  4   >#    U  H  nT" U5      v   M     g 7fr6   r^   )r:   rx   rz   s     r+   r<   HDebugInterpreter.run_node.<locals>.subst_symint_tuple.<locals>.<genexpr>   s     8Cbb))Cs   )tuple)nisrz   s    r+   subst_symint_tuple5DebugInterpreter.run_node.<locals>.subst_symint_tuple   s    8C888r-   c                  > T" U R                  5       5      S:  ae  [        U R                  5       HL  nT" U R                  U5      5      UR                  U5      :w  d  M/  T" U R	                  U5      5      S:  d  ML    g   g)Nr   r   FT)numelrangendimstridesize)abidxrz   s      r+   check_significant_strides<DebugInterpreter.run_node.<locals>.check_significant_strides   sb    AGGI&* =C$QXXc]3qxx}D(59$ ) r-   c           
     z  > [        U5      (       d  [        S[        U5       35      eU R                  UR                  :w  a,  [        U" 5        SU R                   SUR                   35      eT" U R	                  5       5      UR	                  5       :w  aK  [        U" 5        SU R	                  5        ST" U R	                  5       5       SUR	                  5        35      eT" X5      nU(       dK  [        U" 5        SU R                  5        ST" U R                  5       5       SUR                  5        35      eg )Nz"expected desc to be callable, got z:  != z aka )callablerv   rD   r4   r   r   )nvrvdescsame_stridesr   r   s       r+   check(DebugInterpreter.run_node.<locals>.check   s   D>>$'I$t*%VWWxx288#$xr"((4z%JKK!"''),	9$vhb51CBGGI1N0OtTVT[T[T]S^_  5R<L$vhbU3Ebiik3R2SSWXZXaXaXcWde   r-   valr   c                 (   > ST  STR                    3$ )Nzoutput z where ri   )irl   s   r+   <lambda>+DebugInterpreter.run_node.<locals>.<lambda>   s    s'$:M:M9N&Or-   )rx   r   returnrw   )r   ztuple[IntLikeType, ...]r   ztuple[int, ...])r   torch.Tensorr   r   r   bool)r   r   r   r   r   zCallable[[], str]r   None)rj   run_nodemetapytreetree_flattenr>   rv   zipr   r7   r#   Tensor)rl   nr   ry   n_vals_n_specr_vals_r_specr   r   r   r   rz   r   rm   s   `         @@@@r+   r   DebugInterpreter.run_node   s    		9		 	 GQAFF?$11!&&-@OF$11!4OF 6{c&k)$F}DV%FGG s6{!3VD	2!"ell33bOP E r-   r   )r?   r   re   zdict[Node, Any] | Nonerf   r   r   r   )r   r   r   r   )__name__
__module____qualname____firstlineno__rk   r   __static_attributes____classcell__)rm   s   @r+   rb   rb      sE     /3%)	

 ,
 #	

 

 
5 5r-   rb   c                ,    [        U 5      R                  $ )z
Returns a (slow) interpreter over the FX graph module that also checks
various debugging properties (e.g., that tracing strides matched real
strides.)
)rb   rk   r_   s     r+   	debug_nopr      s     D!%%%r-   c                    [        U 5        [        R                  R                  U 5      n[        R                  R	                  UR                  5       5      nU$ r6   )r   r#   rF   rG   rI   rJ   )r)   rW   rQ   s      r+   simple_ts_compiler      s=    D		A		"AHr-   c                "    [        U [        5      $ r6   )r   r   )rQ   s    r+   nnc_jitr      s    ,--r-   c                0    [        U R                  5        U $ r6   )rU   rV   r_   s     r+   print_compiler     s    	$))Kr-   c                    [         [         [        [        S.nUR                  U5        [	        U [
        R                  R                  5      (       a  [        U 40 UD6$ [        U 40 UD6$ )a:  
Wrapper function over :func:`aot_function` and :func:`aot_module` to perform
memory efficient fusion. It uses the
:func:`min_cut_rematerialization_partition` partitioner to perform efficient
recomputation. It uses NVFuser to compile the generated forward and backward
graphs.

.. warning::
    This API is experimental and likely to change.

Args:
    fn (Union[Callable, nn.Module]): A Python function or a ``nn.Module``
        that takes one or more arguments. Must return one or more Tensors.
    **kwargs: Any other overrides you want to make to the settings

Returns:
    Returns a ``Callable``  or ``nn.Module`` that retains the eager behavior
    of the original :attr:`fn`, but whose forward and backward graphs have
    gone through recomputation optimizations, and the graphs have been
    compiled with nvfuser.

fw_compilerbw_compilerpartition_fndecompositions)
rR   r   default_decompositionsupdater7   r#   nnModuler   r   )fnr@   configs      r+   memory_efficient_fusionr     sZ    6 "!;0	F MM&"ehhoo&&"'''B)&))r-   c                    U R                  S5        [        SU Vs/ s H  o"R                  UR                  4PM     sn S35        SSKJn  U" 5       R                  5       " U6   [        X5      $ s  snf )NfooaQ  
##############################################################
# To minimize FX graph, copy and paste the below and run it  #
##############################################################

import torch
import torch.fx as fx
from functorch.compile import minifier, check_nvfuser_subprocess, check_nvfuser_correctness_subprocess

inps = a?  
inps = [torch.ones(shape, dtype=dtype, device='cuda') for (shape, dtype) in inps]
from foo import FxModule
mod = FxModule().cuda()

with torch.jit.fuser("fuser2"):
  # check_nvfuser_subprocess can be replaced with check_nvfuser_correctness_subprocess
  minifier(fx.symbolic_trace(mod), inps, check_nvfuser_subprocess)
r   )FxModule)	to_folderrU   shaper4   r   r   cudarR   )r)   rM   r   r   s       r+   debug_compiler   9  so     	NN5		 &**T''177	T*+ ,	( JOOtd!! 	+s   !A0
rw   graph_indexc                P   / n[        U S5       n[        R                  " U5      n/ nU H  n[        U5      S:X  a  UnU" [        R                  " 5       5      nOUu  pWpn
U	[
        R                  [
        R                  [
        R                  [
        R                  [
        R                  [
        R                  [        [        1;   a  [
        R                  " SSXyU
S9nO[
        R                  " XyU
S9nUR                  U5        M     SSS5        U$ ! , (       d  f       U$ = f)zR
Return a random input for the given inputs meta generated from _save_fx_default.
rbr   r   )r4   rC   N)openpickleloadr>   randomr#   rw   int32int64r   uint8floatrandintrandappend)input_data_pathinputsrQ   inputs_metar   rD   input_r   _strider4   rC   s              r+   
get_inputsr   [  s     "$F	ot	$kk!nD4yA~fmmo.6:3WVIIKKKKJJIIKK	 	 #]]1aFSF"ZZ6JFMM&!'   
%. M/ 
%	$. Ms   C=D
D%c           	        ^ ^^^	^
 SSK Jn  S	U	4S jjm	        S
U UUU	4S jjm
      SU
4S jjn      SU
4S jjn      SU
4S jjnU" UUUUU[        S9$ )a  
The forward, backward, and joint computation graph will be stored in
{folder_name}/{current_name}/{current_name}_forward_{graph_index},
{folder_name}/{current_name}/{current_name}_backward_{graph_index}, and
{folder_name}/{current_name}/{current_name}_joint_{graph_index} respectively.
The input shape of the graphs will be stored in the .input files.
These files can be loaded with pickle,
and is a list of format (type, shape, stride, dtype, device).
In the case of type = int or float, it is just (type,).
For joint graph input, it is a nested list [[],[]]
where the two inner lists have the same format.
If dump_example_input is True, example_inputs will be stored in .pt file.
Since each function might produce multiple graphs,
the graph_index is used to distinguish difference graphs
r   )aot_module_simplifiedc                  > / n[        U 5      S:  a6  [        U S   [        5      (       a  UT" U S   5      -  nUT" U S   5      -  nU$ U  H  n[        U5      [        L d  [        U5      [
        L a  UR                  [        U5      45        MD  UR                  [        U5      UR                  UR                  5       UR                  UR                  45        M     U$ )Nr   r   )r>   r7   r   rD   rw   r   r   r   r   r4   rC   )r?   
input_metaargget_input_metas      r+   r   (_save_fx_default.<locals>.get_input_meta  s    
t9q=ZQ77.a11J.a11JCCyC49#5!!49,/!!#Y		3::<CJJO	  r-   c                2  > [        U R                  R                  5      S:X  a,  [        R                  [        R
                  STU[        5        g [        R                  " U 5      nUR                  R                  [        R                  R                  R                  5       5        UR                  5         T	" U5      n[        R                  " T ST 3SS9  UR!                  T ST ST SU S[         3	5        [#        T ST ST SU S[         ST SU S[         S3S5       n[$        R&                  " XE5        S S S 5        T(       a8  [        R(                  " UT ST ST SU S[         ST SU S[         S	35        g g ! , (       d  f       NN= f)
Nr   z!No nodes in graph {%s}_{%s}_{%s}./T)exist_okrW   z.inputwbz.pt)r>   r!   rA   logloggingWARNINGr   copydeepcopyset_codegenr#   fxCodeGenr(   osmakedirsr   r   r   dumpsave)

gm_to_saver?   	type_namegmr   rQ   current_namedump_example_inputfolder_namer   s
         r+   graph_saver_helper,_save_fx_default.<locals>.graph_saver_helper  s    z%%&!+GG3 ]]:&
UXX^^3356
#D)

{m1\N3dC
m1\N!L>9+Q{mT	
 m1\N!L>9+Q{mSTUaTbbcdmcnnop{o|  }C  D
 KK
&	

 JJ-qa~Qyk;-WXYeXffghqgrrst  tA  AD  E 
 
s   )F
Fc                   > T" XS5        U $ )Nforwardr^   r   example_inputsr   s     r+   graph_saver_forward-_save_fx_default.<locals>.graph_saver_forward  s     	2y9	r-   c                ,   > T" XS5        [         S-  q U $ )Nbackwardr   )r   r  s     r+   graph_saver_backward._save_fx_default.<locals>.graph_saver_backward  s     	2z:q	r-   c                ,   > T" XS5        [        X5      $ )Njoint)r   )r   
joint_argsr   s     r+   graph_saver_joint+_save_fx_default.<locals>.graph_saver_joint  s     	273 00r-   r   )r?   r   r   z	list[Any])r   fx.GraphModuler?   r   r   strr   r   )r   r  r  list[torch.Tensor]r   r  )r   r  r  r  r   z%tuple[fx.GraphModule, fx.GraphModule])functorch.compiler   r   )r   r   r   r   r  r   r  r
  r  r   r   s   ```      @@r+   _save_fx_defaultr  z  s    , 8!"!*-!:=!	! !F,>	,>	11(:1	.1 !
'(&- r-   c                (    Sq [        [        XU5      $ )aK  
Dump the forward, backward, and joint computation graph.
Example Usage:
save_fx_func = graph_dumper_aot(current_name, folder_name, dump_example_input = False)
optimize_ctx = torchdynamo.optimize(
    save_fx_func
)
with torch.enable_grad():
    with optimize_ctx:
        result = forward_and_backward_pass(model, example_inputs)
r   )r   r   r  )r   r   r   s      r+   graph_dumper_aotr    s     K#\@RSSr-   )r)   r  r   r  )r   zGenerator[None, None, None])r)   r  rM   zSequence[Any]r   torch.jit.ScriptModule)T)
r)   r  rW   r   rX   r  rT   r   r   r  )rX   r  r   z5Callable[[fx.GraphModule, list[Any]], fx.GraphModule])r)   r  rW   r   r   r  )r)   r  rW   r   r   zDCallable[[DebugInterpreter, Any, dict[Node, Any] | None, bool], Any])r)   r  rW   r   r   r  )rQ   Callable[..., Any]r   r  )r   Callable[_P, _R] | nn.Moduler@   r   r   r  )r)   r  rM   zSequence[torch.Tensor]r   r  )r   r  r   r  )r   r  r   r  r   r   r   ztorch.fx.GraphModuler  r  r   z	nn.Module)F)r   r  r   r  r   r   r   z Callable[[bool, nn.Module], Any])d
__future__r   r   r   r   r   r   
contextlibr   	functoolsr   typingr   r   typing_extensionsr   r	   rq   r#   torch.fxr   torch.nnr   torch.utils._pytreeutils_pytreer   r
   torch._decompr   %torch.fx.experimental.symbolic_shapesr   aot_autogradr   r   r   compile_utilsr   partitionersr   r   r   collections.abcr   r   r   torch.fx.noder   torch.typesr   r   r   	getLoggerr   r   r,   r2   rR   rY   r\   r`   Interpreterrb   r   r   r   r$   r%   detachgelu_backwardleaky_relu_backwardsigmoid_backwardthreshold_backwardhardtanh_backwardhardsigmoid_backwardhardswish_backwardtanh_backwardsilu_backwardelu_backwardcudnn_batch_normcudnn_batch_norm_backwardmasked_fillScalarr   elu
leaky_reluhardtanh	hardswishhardsigmoidconj_physicalis_same_sizer   r   r   r   r   __annotations__r   r  r  r^   r-   r+   <module>rF     s1   "   	   %  % 0     $ $  , > G G *  =="' t_T]!
 ? ? , ,` AE
 (+9=H
H:H 	 	Er~~ EP &
& &I& &  . yy~~KK""HHOOMMNN- 4 ,,BC   
$*$$*$* "$*N"
" 6""> S >fff f 		f
 'f fV EJTT$'T=AT%Tr-   