
    N j}                   J   % S SK Jr  S SKrS SKrS SKrS SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKJrJrJrJrJr  S SKJrJr  S SKJr  S SKJr  S SKJrJrJrJrJrJrJrJrJrJ r J!r!J"r"  S SK#J$r$J%r%J&r&J'r'J(r(J)r)  S S	K*J+r+  S SK,r,S S
K,J-r-J.r.J/r/  S SK0J1s  J2s  J3r4  S SK5J6s  J7r8  S SK9r:S SK;r:S SK<J7s  J=r>  S SK?J@r@  S SKAJBrB  S SKCJDrD  S SKEJFrF  S SKGJHrH  S SKIJJrJ  S SKKJLrLJMrMJNrNJOrOJPrP  S SKQJRrRJSrSJTrTJUrUJVrVJWrWJXrXJYrYJZrZ  S SK[J\r\  S SK]J^r^  S SK_J`r`  S SKaJbrbJcrcJdrdJere  S SKfJgrg  SSKhJiriJjrj  SSKkJlrlJmrmJnrnJoroJprp  SSKjJqrqJrrrJsrsJtrtJuru  SSKvJwrw  SSKxJyryJzrzJ{r{J|r|  SSK}J~r~  SSKJrJr  SS K7JrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJrJr  SS!KJrJrJr  \(       a+  S S"KJr  S S#KQJr  S S$K[Jr  SS%KJr  SS&KJr  SS'KJr  SS(K7Jr  O\rS)\S*'    S SKr\GRT                  rS+r\'" S-5      r\!" S.5      r\!" S/5      r\!" S05      r\"\\-4   rS)\S1'   \"\\\-4   rS)\S2'   \"\:GRl                  GRn                  \:GRl                  GRp                  4   rS)\S3'   \GRt                  " \5      r\R*                  " \	GRz                  S4S59r\:GR2                  GR|                  r \"\S6\\S64   S7S8\\\"\\\S64   S6S7S84         4   rS)\S9'   GS-S: jr\GR                  " S+S;9 " S< S=5      5       rGS.S> jrGS/S? jrGS0S@ jrGS0SA jr      GS1SB jr/ SCQr/ SDQr GS2     GS3SE jjrGS4SF jr GS2     GS3SG jjr\GS5GS6SH jj5       r\GS5GS7SI jj5       r GS5     GS8SJ jjr    GS9SK jr    GS:SL jrGS;SM jrGS;SN jrGS<SO jr        GS=SP jr      GS>SQ jrGS?SR jrGS@SS jrST r " SU S85      r\" S,S;9 " SV SW5      5       r\ " SX SY\5      5       rGSASZ jr\ " S[ S\\5      5       r\ " S] S^\5      5       r\" S_5      \" S`5      \" Sa5      \" Sb5      \" Sc5      \" Sc5      \" Sd5      Se.rSf\Sg'    GS5       GSBSh jjr\ " Si Sj\5      5       rS\." S 5      4       GSCSk jjr\\\-   \\-   /\4   rS)\Sl'    " Sm Sn\5      r " So Sp\5      r " Sq Sr\5      r\ " Ss St\5      5       r\ " Su Sv\5      5       r\ " Sw Sx\5      5       rGSDSy jrGSDSz jr     GSE             GSFS{ jjr      GSGS| jrGSHS} jr\ " S~ S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r " S S\5      r\ " S S\5      5       r\ " S S\5      5       r\ " S S\5      5       r      GSIS jrGSJS jr " S S5      r\ " S S\5      5       r " S S\5      Gr  " S S\5      Gr " S S\5      Gr " S S\5      Gr " S SG\ 5      Gr\ " S S\5      5       Gr " S S\5      Gr\" S,S;9 " S S\\m5      5       Gr\" S,S;9 " S SG\\5      5       Gr " S SG\5      Gr	 " S SG\	5      Gr
 " S SG\	5      Gr\ " S S\5      5       Gr\ " S S\5      5       Gr\" S,S;9 " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr\"\\G\\G\\"\\\G\4      4   Gr " S S5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr " S SG\5      Gr    GSKS jGr\" S,S;9 " S SG\5      5       Gr " S SG\5      Gr " S SG\5      Gr\" S,S;9 " S SG\5      5       Gr\" S,S;9 " S SG\5      5       Gr  " S SG\ 5      Gr! " S SG\5      Gr" " S SG\5      Gr# " S SG\5      Gr$ " S SG\$5      Gr% " S SG\$5      Gr& " S SG\5      Gr' " S SG\5      Gr( " S SG\5      Gr) " S SG\5      Gr* " S SG\5      Gr+ " S SG\+5      Gr, " S SG\"5      Gr- " S SG\5      Gr. " S SG\5      Gr/ " S SG\ 5      Gr0 " S SG\5      Gr1 " S SG\5      Gr2 " S SG\5      Gr3 " S SG\5      Gr4\" S,S;9 " S GS 5      5       Gr5 " GS GSG\"5      Gr6\" S,S;9 " GS GSG\65      5       Gr7 " GS GSG\65      Gr8\ " GS GS\5      5       Gr9 " GS	 GS
G\5      Gr:\GR                   " GS GS\5      5       Gr; " GS S6G\;5      Gr< " GS GSG\;5      Gr=\" S,S;9 " GS GS\5      5       Gr>GSLGS jGr?\" S,S;9 " GS GSG\5      5       Gr@\" S,S;9 " GS GSG\5      5       GrA    GSMGS jGrB\" S,S;9 " GS GSG\5      5       GrC " GS GSG\65      GrD " GS GS\5      GrE\ " GS GSG\E5      5       GrF\ " GS  GS!G\E5      5       GrG " GS" GS#G\65      GrH " GS$ GS%G\H5      GrI " GS& GS'G\H5      GrJ " GS( GS)G\H5      GrKGSNGS* jGrLGSNGS+ jGrMGSOGS, jGrNg! \ a    SrS,r GNf = f(P      )annotationsN)Callable	GeneratorIterableIteratorSequence)AbstractContextManagernullcontext)Enum)partial)AnycastClassVarLiteralOptionaloverloadSupportsFloatSupportsIntTYPE_CHECKING	TypeAliasTypeVarUnion)assert_neverNeveroverride	ParamSpecSelfTypeIs)patch)ExprIntegerSymbol)identity)GraphModuleSerializer)can_auto_functionalize)metricsget_free_symbols)is_opaque_type)compute_required_storage_lengthis_boolean_dtypeis_float_dtypemake_channels_last_strides_for
StrideType)	&_remove_effect_token_unbacked_bindingscompute_unbacked_bindingsfree_symbolsfree_unbacked_symbolsIterateExprsrebind_unbackedresolve_unbacked_bindingsShapeEnvSymTypes)Node
OrderedSet)_disable_current_modes)CleanDivFloorDivModModularIndexing)SymT   )configdependencies)BackendFeatureCodegenSymbolget_scheduling_for_deviceindex_prevent_reorderingKernel)Depextract_free_symbols#extract_input_node_reduction_rangesextract_read_writesvar_builder)LoopBody)OpCounterCSEOpCountResultReductionType	StoreMode)benchmarker)DevicePropertiesReductionHint)argsortargsort_symcache_on_selfcache_on_self_and_argsceildivconvert_shape_to_inductorconvert_shape_to_symintdeveloper_warningdo_bench_using_profilingdtype_from_sizeget_dtype_sizeget_kernel_metadataGPU_ALIGN_BYTESir_dataclass
is_dynamicis_gpu	sympy_dotsympy_index_symbolsympy_index_symbol_with_prefixsympy_product
sympy_substensor_is_aligned)opsOpsValueV)FakeScriptObject)SympyBoolean)Argument)CUTLASSTemplate)PythonWrapperCodegen)GraphLowering)IndentedBufferr   rr   TF_P_T_U_V_IntLike_NumLike_OpOverloadsz  prefix	TensorBoxr"   IRNode_NodeOrNodesc                .    [        U [        [        45      $ N)
isinstanceintr!   xs    c/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/_inductor/ir.py
_is_staticr      s    a#w((    )frozenc                  R    \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S
\S'   S\S'   Srg)GraphPartitionSignature   OrderedSet[sympy.Symbol]symbol_inputsz5dict[str, Union[IRNode, sympy.Expr, TorchBindObject]]input_nodeslist[IRNode]output_nodeszdict[str, bool]input_deallocationboolskip_cudagraph	list[str]constant_names N__name__
__module____qualname____firstlineno____annotations____static_attributes__r   r   r   r   r      s/     ,+ GF (' r   r   c                &   ^ SU4S jjmT" U 5        g )Nc                  > U c  g [        U [        [        45      (       a  U  H  nT" U5        M     g [        U [        5      (       a   U R	                  5        H  nT" U5        M     g [        U [
        [        [        [        [        R                  R                  R                  [        [        [        [         4	5      (       d   S[#        U 5       S35       eg )NzFound zE, which is not a supported top level IR node. See [Note: Inductor IR])r   listtupledictvalues
ExpandViewDynamicScalarAssertScalarr   sympylogicboolalgBooleanr    r   EffectfulKernelShapeAsConstantBuffertype)nodesnode_check_tensorboxs     r   r   %validate_ir.<locals>._check_tensorbox   s     =e}-- & t$$ & ' ! KK''//#)
   e%jk r   )r   Optional[_NodeOrNodes]returnNoner   )node_or_nodesr   s    @r   validate_irr      s    < ]#r   c                b   ^  [        T [        5      (       d   [        T 5      5       eSU 4S jjnU$ )Nc                 0   > [        [        T5      " U 0 UD6$ r   )getattrrl   )argskwargsnames     r   fnops_wrapper.<locals>.fn  s    sD!42622r   )r   objectr   r   r   rm   )r   strr   )r   r   s   ` r   ops_wrapperr   
  s+    dC  ,$t*, 3 Ir   c           
     f   ^ [        [        U [        [        U 5      5      5      5      mSU4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   lenrange)indexi	inv_orders     r   reindex inverse_reorder.<locals>.reindex  sC    5zS^+++-23u:->?->il#->???   Ar   Sequence[_T]r   r   )r   zipr   r   )orderr   r   s     @r   inverse_reorderr     s*    Sc%j 123I@ Nr   c                   ^  SU 4S jjnU$ )Nc                   > [        U 5      [        T5      :X  d   e[        [        U 5      5       Vs/ s H
  oTU      PM     sn$ s  snf r   r   )r   r   r   s     r   r   same_reorder.<locals>.reindex  sB    5zSZ''').s5z):;):AeAh):;;;r   r   r   )r   r   s   ` r   same_reorderr     s    < Nr   c                   ^ ^ SU U4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   )r   reindex1reindex2s    r   r    fuse_reindexing.<locals>.reindex)  s    ((r   )r   r   r   zSequence[_V]r   )r   r   r   s   `` r   fuse_reindexingr   %  s    ) ) Nr   )   r      rA   )   r   r   r   rA   c                j    Ub  [        S U  5       5      (       a  [        U 5      nU$ [        X5      nU$ )z)
Convert strides to fill order (argsort)
c              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   r   r   r   r!   .0ss     r   	<genexpr>!get_fill_order.<locals>.<genexpr>9  s#     QS
1sEMM.B C CS   -/)allrV   rW   )seq	shape_env
sorted_idxs      r   get_fill_orderr   3  s=     CQSQQQ$+CL
  !0
r   c                    [        U 5       VVs0 s H  u  pX!_M	     nnn[        [        U 5      5       Vs/ s H  oCU   PM	     nnU$ s  snnf s  snf )zx
Convert stride order to fill order
For channel last format,

stride order = [3, 0, 2, 1] and fill order = [1, 3, 2, 0]
)	enumerater   r   )r   idxposlookupr   
fill_orders         r   stride_order2fill_orderr   A  sR     (1'78'783ch'7F8%*3u:%67%6)%6J7 97s
   AAc                    [        X5      n[        [        U 5      5       Vs/ s H  nSPM     nn[        U5       H	  u  pVXTU'   M     U$ s  snf )z!
Convert strides to stride order
r   )r   r   r   r   )r   r   r   _outr   elems          r   get_stride_orderr   M  sL     !/s >JCHo
&o1oC
&Z(D	 )J 's   A
c                    g r   r   r   guard_shapes     r   ir_node_to_tensorr   Z  s    BEr   c                    g r   r   r   s     r   r   r   ^  s    LOr   c                   U c  g U(       d%  [         R                  R                  R                  nO[        nU R                  5        Vs/ s H
  o2" U5      PM     nn[        U 5      (       a0  U R                  5       R                   Vs/ s H
  o2" U5      PM     nnO[        R                  U5      nU R                  5       nU R                  5       n[        U5      n[        U5      n[         R                  R                  R                  R                  5          [         R"                  " XEXgS9R%                  5       nS S S 5        U$ s  snf s  snf ! , (       d  f       W$ = f)N)sizestridedtypedevice)rn   graphsizevars	size_hintr#   get_sizeis_storage_and_layout
get_layoutr  FlexibleLayoutcontiguous_strides	get_dtype
get_devicer\   r   suppress_guardstorchempty_stridedzero_)	r   r   shape_fnr   r  r  r  r  ts	            r   r   r   b  s    	y 77##--!".AHQKD.Q'(||~'<'<='<!(1+'<=2248KKME\\^F"4(D$V,F	
			#	#	3	3	5E

%' 	
 
6 H / > 
6	5 Hs   	EE$E
E c                D    [        U [        5      (       a
  U (       d  S /$ U $ r   )r   r   values    r   may_convert_to_optionalr    s!     %u vLr   c                @   [        U [        5      (       d  U c  U $ [        U [        R                  5      (       a  U R                  $ [        U [
        [        45      (       a  [        U R                  5       5      $ [        SU  S[	        U 5      R                   S35        g )Nzget_device_type(: ))r   r   r  r  r   r   
OutputSpecget_device_typer  r   r   r   s    r   r  r    sz     !SQY	Au||	$	$vv	A
+	,	,q||~..#A3ba)9)9(:!<=r   c                    [        U 5      nUS;   a  [        [        U S35      S:X  a  ggUb  [        U5      =nc  gSSKJn  [        U[        5      (       d   [        U5      5       e[        X#5      $ )N)cpucuda_backendtritonTFrA   )TritonScheduling)	r  r   rB   rF   codegen.tritonr$  r   r   
issubclass)r   r  device_schedulingr$  s       r   	is_tritonr(    sy    QF  6fXX./8;!:6!BBK0'..G5F0GG.'::r   c                    [        U 5      S:H  $ )Nr   )r  r   s    r   is_cpur*    s    1&&r   c                j  ^ [        U [        5      (       aM  U R                  5       b<  [        U R	                  5       5      (       d  [        U R                  5       5      (       a  g[        R                  " U4S jU R	                  5       S S  5       6 n[        R                  " [        R                  " U R	                  5       S   S5      [        R                  " U R                  5       S   S5      5      n[        R                  " X#5      n[        R                  R                  R                  U5      $ )NFc              3  f   >#    U  H&  n[         R                  " [        UT5      S 5      v   M(     g7f)r   N)r   Eqr>   )r   r   	alignments     r   r   -is_aligned_realized_tensor.<locals>.<genexpr>  s(     	F2EQ%((3q)$a
(
(2Es   .1rA   )r   r   maybe_get_strider2   
get_strider	  r   AndOrr-  Lern   r  r  guard_or_false)r   r.  aligned_stridesaligned_last_dim
is_aligneds    `   r   is_aligned_realized_tensorr:    s    q&!!' 00 ..ii	F!,,."2E	FO xx#Q'!**,r2BA)F ?=J 77**:66r   c                   [        U5      [        U 5      :X  a  [        U 5      [        U5      :X  d   e[        X U5       H  u  p4n[        R                  R                  R                  US5      (       a  M7  [        R                  R                  R                  XE5      (       a  Mg  [        R                  R                  R                  U5      [        R                  R                  R                  U5      :w  d  M    g   g)zH
Returns true if the strides are equal, ignoring dimensions of size 1 .
rA   FT)r   r   rn   r  r  statically_known_leqstatically_known_equalssymbolic_hint)strides1strides2shapedims1s2s         r   significant_strides_equalrE    s     u:X&3x=CM+III5H57700a88ww77
 
gg,,R0AGG4D4D4R4RSU4VV 6 r   c                t   [        U 5      (       d  U $ [        S [        XR                  5       5       5       5      (       a  U $ [	        XR                  5       U R                  5       5      (       d  U $ [        U 5      u  p#/ UR                  Qn[        U R                  5       5       H<  u  pV[        R                  R                  R                  US5      (       d  M6  X   XE'   M>     [        UR                  UR                  UR                   UUR"                  UR$                  5      n['        [)        X'S95      $ )a  
Tries to match the strides of the tensor to those in the meta_strides. Strides of insignificant
dimensions - size 0 or 1 - will be updated.

If there are real stride differences (NHWC vs NCHW), or the tensor is not realized, then the input will be returned
c              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rn   r  r  r=  r   rC  rD  s      r   r   2try_match_insignificant_strides.<locals>.<genexpr>  s1      7FB 	
00887   8:rA   datalayout)r
  r   r   r2  rE  r	  as_storage_and_layoutr  r   rn   r  r  r<  FixedLayoutr  r  r  offset	is_pinnedr   ReinterpretView)tensorstridesstorage
old_layout
new_strider   r   
new_layouts           r   try_match_insignificant_stridesrZ    s    !((
 '#4#4#67   $W.?.?.A6??CTUU/7G%:$$%J&//+,7700A66#JJM - J _'EFFr   c                    U R                   R                  SS9S   n[        UR                  5       VVs/ s H  u  p#UPM	     snnUR                  S'   SSKJn  U" U 5        g s  snnf )Noutputopr   user_visible_output_idxs)record_original_output_strides)r  
find_nodesr   r   metatorch._inductor.compile_fxr`  )gmoutput_noder   r   r`  s        r   gm_original_output_stridesrf    sd    ((%%%215K#K$4$454554K/0 J"2&4s   A#c                    [        5       nU  H9  nU[        UR                  5       SS9-  nU[        UR                  5       SS9-  nM;     [	        U5      $ )NFunbacked_only)r:   r(   r	  r2  r   )inputssym_varsinps      r   get_symbolic_inputsrm  	  sP    !+H$S\\^5II$S^^%5UKK  >r   c                   [        U [        5      (       a  U R                  n [        U [        5      (       a  U R	                  5       n [        U [
        5      (       a  U R                  n [        U [        5      (       a  U R                  5       $ S $ r   )r   r   rM  BaseViewunwrap_view
StorageBoxBufferget_namer   s    r   try_get_namert    sc    !YFF!XMMO!Z  FF%a001::<:d:r   c                     \ rS rSr% Sr\" 5       rS\S'   \R                  " SS9r
S\S'   \R                  " SS9rS	\S
'   \R                  " SS9rS\S'   \R                  " SS9rS\S'   \\R                   SKS j5       5       r\SLS j5       rSMS jrSNS jrSOS jrSPS jrSQS jrSRS jrSOS jrSSSTS jjr SU       SVS jjrSWS jrSXS jrSYS jrSZS jrS[S jr S\S jr!S]S  jr"S^S! jr#S_S" jr$\%S`S# j5       r&SaS$ jr'S]S% jr(SbS& jr)ScSdS( jjr*SeS) jr+SfS* jr,S]S+ jr-SgS, jr.ShS- jr/SiS. jr0S_S/ jr1SjS0 jr2SbS1 jr3S]S2 jr4ScSkS3 jjr5SlS4 jr6SNS5 jr7SmS6 jr8SNS7 jr9 Sn     SoS8 jjr:SpS9 jr;SqS: jr< Sn     SrS; jjr=SsS< jr>StS= jr?SuS> jr@SvS? jrA Sn   SwS@ jjrBSbSA jrCS^SB jrDS]SC jrES]SD jrFSxSE jrGSySF jrHSjSG jrISySH jrJ\K(       a  \%SWSI j5       rLSJrMg'SJrMg')zr   i  zBase class for all intermediate representation (IR) nodes in TorchInductor.

Note:
    This is an abstract base class. Most methods raise NotImplementedError
    and must be overridden by concrete subclasses.
zClassVar[OrderedSet[Any]]_current_originsF)initOrderedSet[Any]originsOptional[list[str]]	tracebackOptional[torch.fx.Node]origin_nodedict[str, Any]r   c              #     #    [         R                  nX-  [         l         S v   U[         l        g ! U[         l        f = f7fr   )r   rv  )ry  olds     r   current_originsIRNode.current_origins.  s4      %%"%-	*&)F#cF#s   A1 A>Ac                L    [        U [        [        [        [        [
        45      $ r   )r   ComputedBufferInputsKernelInputBufferrS  TemplateBuffer)r   s    r   is_realized_nodeIRNode.is_realized_node8  s&    	
 		
r   c                0    [         R                  XU5        g r   )r   __setattr__)selfattrr  s      r   _post_init_setattrIRNode._post_init_setattrE  s     	4u-r   c                   [        U R                  5      nU R                  SU5        U R                  S[        R                  (       a  [
        R                  " 5       OS 5        U R                  SS 5        U R                  S0 5        g )Nry  r{  r}  r   )r:   rv  r  rB   debug_ir_tracebackr{  format_stack)r  ry  s     r   __post_init__IRNode.__post_init__K  sk    T223	73V5N5N//1TX	
 	t4r2r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r   r   deps     r   r   (IRNode.get_read_names.<locals>.<genexpr>V       ?.>s((.>   r:   	get_readsr  s    r   get_read_namesIRNode.get_read_namesU      ?dnn.>???r   c                    U R                   $ r   )r{  r  s    r   get_tracebackIRNode.get_tracebackX  s    ~~r   c                    U R                   $ r   r}  r  s    r   get_origin_nodeIRNode.get_origin_node[      r   c                    g r   r   r  s    r   get_defining_opIRNode.get_defining_op^      r   c                   [        5       nU R                  n[        U [        5      (       a-  U R	                  5       nU R
                  (       a  [        U/5      nU H  n[        US5      (       a.  UR                  (       a  UR                  UR                  5        MB  [        R                  R                  R                  R                  S0 5      R                  UR                  / 5      n[        U[        5      (       d  M  U HQ  n[        R                  R                  R                   R                  US 5      nU(       d  M@  UR                  U5        MS     GM     U$ )Nstack_trace	postToPre)r:   ry  r   ExternKernelr  r}  hasattrr  addr  	_inductordebug _inductor_post_to_pre_grad_nodesgetr   r   #_inductor_pre_grad_node_stack_trace)r  stack_tracesry  r}  r   pre_grad_nodes	node_namer  s           r   get_stack_tracesIRNode.get_stack_tracesa  s    )3,,dL))..0K$k]3Dt]++0@0@  !1!12 OO))JJNN# c$))R(  ".$77!/I--QQUU%t  
 #{$((5 "0 , r   c                6   S[        U SS5       3nU(       a  [        U5      S:  a  US S  S3nU R                  5       (       d  U/$ / nU R                  5        H8  nUR                  S5        X4R	                  S5      -  nUR                  S	5        M:     U/U-   $ )
Nzorigins=ry   @   =   z...zstack_traces = {
})r   r   r  appendsplit)r  shortenry  stack_trace_strr  s        r   common_reprIRNode.common_repr  s    WT9b9:;s7|b( "c*G$$&&9002K""#560066O""3' 3 y?**r   c                .   [        U5      [        U R                  U5      5      -   n[        [        [        U5      5      nU(       a5  [	        SR                  U5      5      n[        U 5      R                   SU S3$ [        U 5      R                   SU S3$ )Nz,
z(
z
)(r  )r   r  mapr   indentjoinr   r   )r  linesr  	multiline	new_liness        r   
str_helperIRNode.str_helper  s     Ud4#3#3G#<==Se_%uzz%01I4j))*#i[<<4j))*!E7!44r   c                    U R                   $ r   r  r  s    r   r  IRNode.get_dtype      zzr   c                D     U R                  5       $ ! [         a     g f = fr   )r  NotImplementedErrorr  s    r   maybe_get_dtypeIRNode.maybe_get_dtype  s&    	>>##" 		    
c                2    [        S[        U 5       S35      e)Nz#get_layout() is not implemented by !r  r   r  s    r   r  IRNode.get_layout  s    !$GT
|ST"UVVr   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_layoutIRNode.maybe_get_layout  &    	??$$" 		r  c                "    U R                  5       $ r   )r  r  s    r   get_output_specIRNode.get_output_spec  s      r   c                D     U R                  5       $ ! [         a     g f = fr   )r  r  r  s    r   maybe_get_output_specIRNode.maybe_get_output_spec  s(    	''))" 		r  c                >    [        U R                  5       [        5      $ )z4True for single tensor output (excludes MultiOutput))r   r  Layoutr  s    r   has_tensor_outputIRNode.has_tensor_output  s    $446??r   c                2    [        S[        U 5       S35      e)Nz!get_size() is not implemented by r  r  r  s    r   r	  IRNode.get_size  s    !$Ed4j\QR"STTr   c                D     U R                  5       $ ! [         a     g f = fr   )r	  r  r  s    r   maybe_get_sizeIRNode.maybe_get_size  %    	==?"" 		r  c                "    U R                  5       $ r   r	  r  s    r   rA  IRNode.shape  s    }}r   c                4    [        U R                  5       5      $ r   )ri   r	  r  s    r   	get_numelIRNode.get_numel  s    T]]_--r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ Nr   rn   r  r  statically_known_truer   r-  r  r  s    r   is_zero_elementsIRNode.is_zero_elements  0    ww55ehht~~?OQR6STTr   c                0    [        S[        U 5       35      e)a  
If the IRNode refers to data which has not been materialized (e.g.,
it is a Pointwise/Reduction that could potentially have more
compute fused into it), realize the IRNode into physical memory,
ending the possibility of fusing into it, but allowing, e.g., multiple
users to access the data without having to recompute.

Check StorageBox.realize for a particularly notable implementation.

TODO(ezyang): I think, in principle, every IRNode should have an
implementation of this, and most of the time no-op is OK, but you
really do have to audit each IRNode for this, so for now, raise
an error if it's not implemented.  Note that some code in graph.py
will catch this thrown error and suppress it with a warning.
zrealize NYI on r  r  s    r   realizeIRNode.realize  s      "ODJ<"@AAr   Nc                0    [        S[        U 5       35      e)Nzcodegen_reference NYI on r  r  writers     r   codegen_referenceIRNode.codegen_reference  s    !$=d4j\"JKKr   c                    g r   r   r  s    r   r  IRNode.get_device  r  r   c                0    U R                  5       nUc   eU$ r   )r  r  r  s     r   get_device_or_errorIRNode.get_device_or_error  s    "!!!r   c                    gNFr   r  s    r   has_exceeded_max_readsIRNode.has_exceeded_max_reads      r   c                >    [        [        U 5      R                  5      er   r  r   r   r  s    r   make_loaderIRNode.make_loader      !$t*"5"566r   c                >    [        [        U 5      R                  5      er   r  r  s    r   make_indexerIRNode.make_indexer  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   r2  IRNode.get_stride  r  r   c                D     U R                  5       $ ! [         a     g f = fr   )r2  r  r  s    r   r1  IRNode.maybe_get_stride  r  r  c                >    [        [        U 5      R                  5      er   r  r  s    r   rs  IRNode.get_name  r  r   c                D     U R                  5       $ ! [         a     g f = fr   )rs  r  r  s    r   maybe_get_nameIRNode.maybe_get_name  r  r  c                z     U R                  5       [        R                  R                  ;   $ ! [         a     gf = fr  )rs  rn   r  graph_inputsr  r  s    r   is_input_bufferIRNode.is_input_buffer  s4    	==?agg&:&:::" 		s   *- 
::c                    gr  r   r  	thresholds     r   has_large_inner_fnIRNode.has_large_inner_fn  r  r   c                    g r   r   r  userss     r   
mark_reuseIRNode.mark_reuse      r   c                    g r   r   r  s    r   realize_hintIRNode.realize_hint  r5  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   rp  IRNode.unwrap_view  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   freeze_layoutIRNode.freeze_layout  r  r   c                >    [        [        U 5      R                  5      er   r  r  r   allow_paddings      r   freeze_layout_with_stride_order&IRNode.freeze_layout_with_stride_order       "$t*"5"566r   c                >    [        [        U 5      R                  5      er   r  r  r   s     r   freeze_layout_with_fill_order$IRNode.freeze_layout_with_fill_order!  r  r   c                >    [        [        U 5      R                  5      er   r  r  r  s     r   freeze_layout_with_same_order$IRNode.freeze_layout_with_same_order$  r  r   c                >    [        [        U 5      R                  5      er   r  r  exact_stridesr@  s      r    freeze_layout_with_exact_strides'IRNode.freeze_layout_with_exact_strides'  rC  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_read_writesIRNode.get_read_writes,  r  r   c                6    U R                  5       R                  $ r   rR  readsr  s    r   r  IRNode.get_reads/      ##%+++r   c                4    [        U R                  5       5      $ r   )r   r  r  s    r   	num_readsIRNode.num_reads2  s    4>>#$$r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_storage_numelIRNode.get_storage_numel5  r  r   c                >    [        [        U 5      R                  5      er   r  r  ri  s     r   get_free_symbol_usesIRNode.get_free_symbol_uses8  rC  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_reduction_typeIRNode.get_reduction_type=  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_reduction_sizeIRNode.get_reduction_size@  r  r   c                    gr  r   r  s    r   	is_externIRNode.is_externC  r  r   c                    gr  r   r  s    r   is_no_opIRNode.is_no_opF  r  r   c                >    [        [        U 5      R                  5      er   r  r  s     r   constant_to_deviceIRNode.constant_to_deviceI  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_mutation_namesIRNode.get_mutation_namesL  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_operation_nameIRNode.get_operation_nameO  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   get_inputs_that_alias_output#IRNode.get_inputs_that_alias_outputR  r  r   c                    g r   r   r  s    r   r  IRNode.dtypeW  s    (+r   r   )ry  zOrderedSet[Node]r   zGenerator[None, None, None]r   r   r   r   )r  r   r  r   r   r   r   r   r   OrderedSet[str])r   rz  r   r|  r   zOptional[Operation]T)r  r   r   Sequence[str])TT)r  zSequence[object]r  r   r  r   r   r   r   torch.dtype)r   zOptional[torch.dtype]r   r  )r   zOptional[Layout]r   r  )r   zOptional[OutputSpec]r   r   r   Sequence[Expr])r   Optional[Sequence[_IntLike]])r   z.Union[_IntLike, sympy.Rel, Sequence[_IntLike]]r   r    r   Optional[str]r   r  zOptional[IndentedBuffer]r   r   r   Optional[torch.device]r   torch.devicer   $Callable[[Sequence[Expr]], OpsValue]r    Callable[[Sequence[Expr]], Expr]r   Sequence[_IntLike]r   r   r-  Optional[int]r   r   r2  r   r   r   r   r   Fr   Sequence[int]r@  r   r   r   r   r  r   r   r  r  r   r   rN  r  r@  r   r   r   r   dependencies.ReadWritesr   zOrderedSet[Dep]r   r   r   rz   ri  r   r   r   r  r  r   r   r   r  )Nr   r   r   r   __doc__r:   rv  r   dataclassesfieldry  r{  r}  r   staticmethod
contextlibcontextmanagerr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r	  r  propertyrA  r  r   r  r	  r  r  r  r  r  r2  r1  rs  r%  r)  r.  r3  r7  rp  r<  rA  rF  rJ  rO  rR  r  rZ  r]  ra  rd  rg  rj  rm  rp  rs  rv  ry  r   r  r   r   r   r   r   r     s^    3=,/>  +00e<G_<%0%6%6E%BI"B+6+<+<%+HK(H"-"3"3"?K?*  * 

 

.3@ B+  PT
5%
504
5HL
5	
5W!@U  .UB$L
777777 ;@7"7377	7
77 HM7/7@D7	7
7,%7 %*7!7	!7
777777 	+ 
+ r   c                      \ rS rSrSS jrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jr S   S S jjrS!S jrSrg)"	Operationi[  c                    S U l         g r   operation_namer  s    r   r  Operation.__post_init__]  s
    -1r   c                    [         er   r  r  s    r   r  Operation.get_device`      !!r   c                @    [        U S5      (       d   eU R                  $ Nr}  )r  r}  r  s    r   r  Operation.get_origin_nodec  s!    t]++++r   c                @    [        U S5      (       d   eU R                  $ )Nry  )r  ry  r  s    r   get_originsOperation.get_originsg  s    tY''''||r   c                8    U R                   c   eU R                   $ r   r  r  s    r   rv  Operation.get_operation_namek  s     ""..."""r   c                    gr  r   r  s    r   rj  Operation.is_externo  r  r   c                    gr  r   r  s    r   rm  Operation.is_no_opr  r  r   c                    [         er   r  r  s    r   rR  Operation.get_read_writesu  r  r   c                &    XR                  5       ;   $ r   )r  r  r   s     r   
is_user_ofOperation.is_user_ofx  s    **,,,r   c                B    [        S U R                  5        5       5      $ )Nc              3  8   #    U  H  oR                   v   M     g 7fr   r  r  s     r   r   +Operation.get_read_names.<locals>.<genexpr>|  r  r  r  r  s    r   r  Operation.get_read_names{  r  r   c                6    U R                  5       R                  $ r   rU  r  s    r   r  Operation.get_reads~  rX  r   c                    [         er   r  r  s    r   get_outputsOperation.get_outputs  r  r   c                    [        5       $ r   r9   r  s    r   get_unbacked_symbol_defs"Operation.get_unbacked_symbol_defs  
    |r   c                    [        5       $ )a  
When unbacked_only=True:
Returns the unbacked symbols which are required to be in scope in
order to successfully perform codegen for this buffer.  For example,
a buffer that corresponds to an extern kernel call that takes i0 as
an argument would return {i0} here.  This is used to generate necessary
dependencies that ensure we actually bind i0 in codegen before you
try to use it.

Note that this is NOT transitive; in particular, if this buffer takes
in as input another buffer with dynamic shape (e.g., (i0,)), we will
not report it here, because you will already have a dependency
on that buffer, which will eventually have a dependency on i0 if
necessary.

When unbacked_only=False:
Similar to `unbacked_only=True` but including all free symbols
instead of only free unbacked symbols.
r9   r`  s     r   ra  Operation.get_free_symbol_uses  s    , |r   c                    g)z
Gets extra global memory size needed by this buffer.
Some algorithms (e.g. group gemm) may require extra global memory in the generated code.
r   r   r  s    r   get_workspace_sizeOperation.get_workspace_size  s    
 r   r  Nr~  r  r  )r   rx  r  r  r  )r   r   r   r   r  r  r   list[Buffer]r   r   r  r  r  )r   r   r   r   r  r  r  r  rv  rj  rm  rR  r  r  r  r  r  ra  r  r   r   r   r   r  r  [  sc    2" #"-@," %*!	!0r   r  c                  v   \ rS rSr% S\S'   S\S'   S\S'   S\S	'   \" S 5       S    S!S
 jj5       rS"S jrS#S jr\r	S$S jr
S%S jrS&S jrS&S jr\S'S j5       r\\R$                  4S(S jj5       r\S)S j5       rS*S jr\S#S j5       rS+S,S jjrS S-S jjrS.S jrS/S jrS0S jrS&S jrS1S jrS2S jrSr g)3Loopsi  r  r  r  r  Callable[..., Any]inner_fnr  rangesc                   ^ [        5       R                  " / U4S jU R                   5       QU R                  T5      P76 $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r   eri  s     r   r   -Loops.get_free_symbol_uses.<locals>.<genexpr>  s     F+Qq-00+   )r:   unionr  inner_fn_free_symbolsr`  s    `r   ra  Loops.get_free_symbol_uses  s>     |!! 
F$++F
&&}5
 	
r   c                   U R                  SU R                  R                   S3[        U R                  5      U R                  5       /U Vs/ s H  o" S[        X5       3PM     sn-   SU R                  < 3/-   5      $ s  snf )N'=origin_node=)r  r  r   r   r  inner_fn_strr   r}  )r  namesr   s      r   _to_strLoops._to_str  s    DKK$$%Q'DJJ!!#
 <AA54q,-.5AB d..1234
 	
 Bs   B
c                $    U R                  S5      $ Nr  r  r  s    r   __str__Loops.__str__      ||K((r   c                    U R                   $ r   r  r  s    r   r  Loops.get_device      {{r   c                    U R                   $ r   r  r  s    r   r  Loops.get_origin_node  r  r   c                    U R                   $ r   r  r  s    r   r	  Loops.get_size  r  r   c                    U R                   $ r   r  r  s    r   get_pointwise_sizeLoops.get_pointwise_size  r  r   c                    UR                  SS 5      nUR                  SS 5      nU " U0 UD6nUR                  SU5        UR                  SU=(       d    UR                  5        [        R	                  U5      $ )Nr}  r{  )popr  r{  r   create)clsr   r   r}  tbrs         r   r  Loops.create  sm    jj5ZZT*   	
]K8	["*;<""r   c                    [        U 5       VVs/ s H0  u  p#US:X  a  [        R                  R                  O
[	        X5      PM2     snn$ s  snnf NrA   )r   r   SZerorh   )r  r~   nr   s       r   _indexLoops._index  sI     "&)
) FEGGLL(Fv(QQ)
 	
 
s   7A
c                |   [        [        R                  " 5       5      n[        R                  " U5         [        R
                  " [        SS5         U R                  " U R                  5       6   UR                  5       sS S S 5        sS S S 5        $ ! , (       d  f       O= f S S S 5        g ! , (       d  f       g = fNallow_indexingT)
rO   rn   MockHandlerset_ops_handlerr   r   r  r  inner_fn_argsgetvalue)r  	opcounters     r   inner_fn_opcountLoops.inner_fn_opcount  sy     1	i(LL)94@MM4--/0%%' A@ )(@@@ )((s#   B--B?	B-
B 	B--
B;c                :    U R                  U R                  5      4$ r   )r  r  r  s    r   r!  Loops.inner_fn_args  s    DKK(**r   c                t    [         R                  R                  " U R                  /U R	                  5       Q76 $ r   )rn   KernelFormatterHandlerir_to_stringr  r!  r  s    r   r  Loops.inner_fn_str  s3    ''44MM
 ..0
 	
r   Nc                z    Uc  Sn[        U[        R                  5      nU R                  5       R                  U:  $ r  )maxrB   realize_opcount_thresholdr$  num_opsr,  s     r   r.  Loops.has_large_inner_fn  s9    I	6#C#CD	$$&..::r   c                `    U R                  U R                  5      n[        U R                  X!S9$ Nrh  )r  r  rJ   r  )r  ri  r   s      r   r  Loops.inner_fn_free_symbols  s%    DKK(#DMM5VVr   c                   [         R                  " [        SS5         U R                  5       (       aJ  [	        U R                  5       U R                  5       U R                  5       5      R                  sS S S 5        $ [	        U R                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )	r   r   r  rd  rL   r  r	  rg  rV  r  s    r   r  Loops.get_reads  s    \\.*:DA&&((*$$&MMO++- % BA +$$&MMO % BAAs   AB8=1B88
Cc                H    [        U R                  5       R                  5      $ r   )r:   r$  read_buffersr  s    r   r  Loops.get_read_names  s    $//1>>??r   c                H    [        U R                  5       R                  5      $ r   )r   r$  r7  r  s    r   rZ  Loops.num_reads  s    4((*7788r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_size() is not implemented by r  r  r  s    r   rg  Loops.get_reduction_size      !9$t*QG
 	
r   c                2    [        S[        U 5       S35      e)Nz+get_reduction_type() is not implemented by r  r  r  s    r   rd  Loops.get_reduction_type  r=  r   c                2    [        S[        U 5       S35      e)Nz+constant_to_device() is not implemented by r  r  r  s     r   rp  Loops.constant_to_device!  r=  r   r   r  r  )r  r  r   r   r  r  r  r  )r   r   r   r   r   r   )r  r  r~   r@   r   r  )r   rP   r   zSequence[Sequence[_IntLike]]r   r  ri  r   r   OrderedSet[Symbol]r  r  r  r  r  )!r   r   r   r   r   rY   ra  r  r   __repr__r  r  r	  r  classmethodr  r  r@   INDEXr  rX   r$  r!  r  r.  r  r  r  rZ  rg  rd  rp  r   r   r   r   r  r    s      G$$)
!
	!
 %
	
) H  	# 	# :>** 
 
 ( (+ 
 

;W@9




r   r  c                   UR                   (       a   [        R                  " [        S5      U5      $ [        R                  " SU5      $ )Nnanr   )is_floating_pointrl   constantfloat)r   r  s     r   nop_loader_fnrM  '  s1    ||E%L%00||Au%%r   c                  d    \ rS rSrS
S jrSS jr\rSS jrSS jr        SS jr	SS jr
Srg	)	Pointwisei.  c                t    U R                  5       (       a  [        [        U R                  S9$ U R                  $ Nr  )r   r   rM  r  r  r  s    r   r  Pointwise.make_loader0  s,      ""=

;;}}r   c                $    U R                  S5      $ r  r  r  s    r   r   Pointwise.__str__7  r  r   c                    / $ r   r   r  s    r   rg  Pointwise.get_reduction_size<  s    	r   c                    g r   r   r  s    r   rd  Pointwise.get_reduction_type?  r  r   c                |    U R                  5       n[        R                  " U=(       d    SU" U5      U" U5      5      $ Nunnamed)r  rl   storer  output_nameindexervarsloaders        r   store_outputPointwise.store_outputB  s2     !!#yy1	74=&,OOr   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  S9$ FMove this to a given device. Requires that all reads are to constants.override_devicer  r  r  r  )r  r   r   ConstantBufferrO  r  r  r  r  ra  s      r   rp  Pointwise.constant_to_deviceK  sI    !!#n.?HP**;;	
 	
r   r   Nr  r  )r   zSequence[sympy.Expr]r  )r^  r  r_  !Callable[[Sequence[Expr]], Never]r`  r  r   r   r  )r   r   r   r   r  r   rE  rg  rd  rb  rp  r   r   r   r   rO  rO  .  sR    ) HP"P 3P 	P
 
P	
r   rO  c                  R    \ rS rSr% S\S'   SrS\S'   S
S jr        SS jrS	rg)ScatteriW  r  output_indexerNrR   scatter_modec           	         U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  S9$ )rf  rg  )r  r  r  r  ro  rp  )	r  r   r   ri  rn  r  r  ro  rp  rj  s      r   rp  Scatter.constant_to_device\  s[    !!#n.?HP**;;..**
 	
r   c                    U R                  5       nUc  Sn[        R                  " UU" U R                  U5      5      U" U5      U R                  S9$ )Nr[  )mode)r  rl   r\  ro  rp  r]  s        r   rb  Scatter.store_outputi  sT     !!##KyyD''-.4L""	
 	
r   r   r  )r^  r  r_  rl  r`  r  r   r   )	r   r   r   r   r   rp  rp  rb  r   r   r   r   rn  rn  W  sB    44"L)"

"
 3
 	

 

r   rn  
logical_ormaximumminimummulr  bitwise_xor)anyr-  minprodsumdotxor_sumz"dict[str, Callable[..., OpsValue]]REDUCTION_COMBINE_FNc                   ^ ^^ T [         ;   a	  [         T    $ T S;   a        SUUU 4S jjnU$ T S:X  a        SS jnU$ [        ST  35      e)Nargmaxargminc                  > U u  p#Uu  pETS:X  a  [         R                  " X$5      nO[         R                  " X$5      n[         R                  " X$5      n[	        T5      (       a  [         R
                  " X"5      n[         R
                  " XD5      n	[         R                  " U[         R                  " X5      5      n[         R                  " U[         R                  " X5      5      nT(       a  [         R                  " X55      O[         R                  " X55      n
[         R                  " U[         R                  " Xz5      5      n[         R                  " XbU5      [         R                  " XcU5      4$ )Nr  )	rl   ltgteqr,   nerv  logical_andwhere)aba_valuea_indexb_valueb_indexmaskequala_isnanb_isnantiearg_break_ties_leftr  reduction_types              r   argmax_combine_fn3get_reduction_combine_fn.<locals>.argmax_combine_fn  s     !G G)vvg/vvg/FF7,Ee$$&&2&&2~~dCFF7,DEucoog.OP ' w(VVG- 
 >>$(CDD		$1		$1 r   welford_combinec                \    U u  p#nUu  pVnXR-
  nXG-   n	Xy-  n
X(U
-  -   X6-   X-  U-  U
-  -   U	4$ r   r   )r  r  a_meana_m2a_weightb_meanb_m2b_weightdelta
new_weight	w2_over_ws              r   welford_combine_fn4get_reduction_combine_fn.<locals>.welford_combine_fn  s]     &'"F(%&"F(OE!,J -I**emh6BB r   zunknown reduction_type=)r  tuple[object, object]r  r  r   tuple[OpsValue, OpsValue])r  #tuple[OpsValue, OpsValue, OpsValue]r  r  r   r  )r  r  )r  r  r  r  r  s   ```  r   get_reduction_combine_fnr    s     --#N33	/	/	$	)>	&	 	: ! 	,	,	2	2	 1	  "! "$;N;K"LMMr   c                  J  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S	'   S!S
 jr\r\" S 5      S"S#U 4S jjj5       rS$S jr	S%S jr
          S&S jrS'S jrS(S jrS"S#S jjrS)S jr\ S*                   S+S jj5       r\          S,S j5       r\\R*                  S4                   S-S jj5       r\      S.S j5       r\      S.S j5       r\        S/S j5       r\      S0S j5       r\ S*               S1S jj5       r\            S2S j5       r\                        S3S j5       r\ S*                     S4S jj5       r\                      S5S j5       rS r U =r!$ )6	Reductioni  r  reduction_rangesrQ   r  r  	src_dtyperU   reduction_hintc                $    U R                  S5      $ )N)r  r  r  r  r  s    r   r   Reduction.__str__  s    ||LMMr   c                |   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r  s     r   r   1Reduction.get_free_symbol_uses.<locals>.<genexpr>  s     P:OQq-00:Or  )superra  r:   r  r  r  ri  	__class__s    `r   ra  Reduction.get_free_symbol_uses  s7    w+M:Z\=O=OP$:O:OP>
 
 	
r   c                    U R                   $ r   )r  r  s    r   rg  Reduction.get_reduction_size  s    $$$r   c                    U R                   $ r   )r  r  s    r   rd  Reduction.get_reduction_type      """r   c           	         [         R                  " U R                  U R                  U R                  U R                  X45      5      n[         R                  " U=(       d    SU" U5      U5        g rZ  )rl   	reductionr  r  r  r  store_reduction)r  r^  r_  r`  reduction_varsr  s         r   r  Reduction.store_reduction  sR     JJNNMM$/	
 	K49gdmUKr   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   index_lengthReduction.index_length  s!    4;;#d&;&;"<<<r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nX4$ r   )r  r  r  r@   R0_INDEX)r  r   rindexs      r   r!  Reduction.inner_fn_args  s6    DKK(T22DMMBr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      n[        U R                  X#US9$ r2  )r  r  r  r@   r  rJ   r  )r  ri  r   r  s       r   r  Reduction.inner_fn_free_symbols  sF    DKK(T22DMMB#MM5
 	
r   c                   U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R
                  UU R                  U R                  U R                  U R                  [        R                  S9$ )rf  rg  r  r  r  r  r  r  r  r  )r  r   r   ri  r  r  r  r  r  r  rU   DEFAULTrj  s      r   rp  Reduction.constant_to_device  sk    !!#n.?HP**;;!22..nn(00	
 		
r   Nc	                n   [         R                  R                  R                  U5      n	[         R                  R                  R                  [	        U5      5      n
US:H  =(       dV    [         R                  R                  U [        R                  5      (       + =(       a    US;  =(       a    [        R                  n[        U	5      (       a  [        U
5      (       d  [        R                  S4$ US:X  a  [        R                  S4$ [        R                  " U 5      nUR                  nSnU(       a]  [         R"                  " [         R$                  R&                  U SS9n[         R"                  " [         R$                  R&                  U SS9nO      SS	 jnUnU
S:X  a  U" X5      nUS:X  a  [        R(                  U4$ Ub  [+        U[,        5      (       a  [.        R0                  " [2        S
S5         [5        U5      u  nnS S S 5        Wbj  Wbg  [         R                  R                  R                  [	        UU-   5      5      nU	U:X  a,  [6        R9                  SUUUUU5        [        R(                  S4$ [        R(                  U4$ X::  d  XS-  S-  :  a  [        R                  S4$ [;        U UUUUUS:w  a  UOSU[        R                  S9nSS jnU" U5      u  nnU(       a  U" U5      u  nn[=        U5      S:X  a  [        R                  S4$ [>        R@                  " URC                  5       URE                  5       5      u  u  nnnSnSnU H  n[         R                  R                  RG                  UU5      n[         R                  R                  RI                  UU[K        URM                  5       5      5      n [O        S U  5       5      n!U!(       a  US-  nM  US-  nM     UU:  a  [        R(                  U" X5      4$ [        RP                  U" X5      4$ ! , (       d  f       GN&= f)Nscanr  rA   r      T)inner_reductionFc                    gr  r   )reduction_numel_hint
numel_hints     r   inner_reduction_splits4Reduction.num_splits.<locals>.inner_reduction_splits7  s     r   r  zUse previous IRNode's range and reduction_ranges instead of split. current ranges: %s, current reduction ranges: %s, current split: %d, new ranges: %s, new reduction ranges: %sr0  r   r~  r  c           	     `  ^	 U R                  5       nUc   e[        S [        UU R                  5       U R	                  5       S9U S9nUR                  5       nUR                  c   eUR                   V s/ s H=  n [        U [        5      (       d  M  [        U [        R                  5      (       a  M;  U PM?     nn / nSn[        UR                  S S9 H  m	[        U	4S jU 5       5      (       d  M  UR                  T	R                  5        T	R                   ["        R$                  R&                  ;   d  Md  ["        R$                  R&                  T	R                      n[)        UR*                  SS 5      nUR-                  5         [)        UR*                  SS 5      U:w  d  M  SnM     XV4$ s  sn f )	Nr  r  r  r   rN  rM  Fc                    U R                   $ r   r  r   s    r   <lambda>@Reduction.num_splits.<locals>.get_read_indices.<locals>.<lambda>  s    affr   keyc              3  T   >#    U  H  oTR                   R                  ;   v   M     g 7fr   )r   r1   )r   r  mds     r   r   AReduction.num_splits.<locals>.get_read_indices.<locals>.<genexpr>  s     F:aBHH111:   %(r  T)r  r  r  r  r	  rR  
range_varsr   r    r   NumbersortedrV  r   r  r   r   rn   r  name_to_bufferr   rN  decide_layout)
r  r  cbread_writesr  indiceschangedbuforiginal_strider  s
            @r   get_read_indices.Reduction.num_splits.<locals>.get_read_indiceso  sn   \\^F%%%%!++-
 B ,,.K ))555 %///Aa& /9!U\\/J /  
 GG[..4DEF:FFFNN288,ww!''"8"88gg44RWW=*1#**h*M))+"3::x>/Q&*G F ##!s   4F+F+2F+r   c              3  B   #    U  H  oS :H  =(       d    US:  v   M     g7f)r   rA   Nr   r   s     r   r   'Reduction.num_splits.<locals>.<genexpr>  s     9AQ!a%   )r  r   r  r   r   r   )r  r  r   ztuple[Sequence[Expr], bool]))rn   r  r  r>  ri   has_featurerD   REDUCE_TO_SINGLE_ELEMENTrB   split_reductionsr   rU   r  rT   r  multi_processor_count	functoolsr   choicesreduction_split_factorINNERr   r   r   r   r  rK   logr  r  r   rC   index_vars_squeezer	  rg  simplify_with_rangesstride_hintsr   keysr   OUTER)"r  	dst_dtyper  r  r  r  r  reduction_numel
input_noder  r  should_splitpropsnum_smmin_elements_per_threadr  outer_reduction_splitsr  
new_rangesnew_reduction_rangesextracted_numel_hintr  r  r  r  r   r  ranges1	num_outer	num_innerr   jrU  outers"                                     r   
num_splitsReduction.num_splits
  s     !ww//==oNWW%%33M&4IJ
%/ 
##FN,S,STT (( '' 	 /00Z
5K5K ((!++U" ((!++ ''/,,"$@I@Q@Q		00&$A" AJ@Q@Q		00&%A"
&)  &<" ?*+?LEz$**E11%*Z*K*K\\.2BDI <JG", J
 ).B.N+,77+;+;+I+I%j3G&GH,( ,/CC		G #,!&0	  -22B66 &&-- ;aZ"_, ((!++--;v-E>5(00	
!	$F ,A.)!,JGQw<1 ((!++'3'F'FJJL!..0(
$NW 		A  55aAAgg&&33>4#7G
 999EQ	Q	  y  &&(>$)   !&&(>$)  U JIs   7P%%
P4c                  ^ ^^^^^ [         R                  R                  R                  T5      m[	        X#5      mSUUU4S jjmUS;   a4  [        T[        R                  T5      5      m      SUU 4S jjmU4S j$ T mT$ )z1Convert inner_fn from a reduction to an pointwisec                   >^  [         R                  " TU U4S j[        R                  " T Vs/ s H  n[	        U5      PM     sn6  5       5      $ s  snf )Nc              3  6   >#    U  H  nT" TU5      v   M     g 7fr   r   )r   r  r   value_fns     r   r   =Reduction._unroll_reduction_fn.<locals>.fn.<locals>.<genexpr>  s&      # UF++#s   )r  reduce	itertoolsproductr   )r   r   
combine_fnr  r  s   ` r   r   *Reduction._unroll_reduction_fn.<locals>.fn  sN    ##"+"3"3,<=,<q%(,<=# 
 >s   Ar  r  c                   > U Vs/ s H  n[         R                  " U5      PM     nnT" X5      [        R                  " T" U5      [        R
                  5      4$ s  snf r   )r   expandrl   
index_exprr  int64)r   r  r   flatten_indexr  s      r   r  0Reduction._unroll_reduction_fn.<locals>.value_fn  sO     4::6a%,,q/6:U+NN=#8%++F  ;s    Ac                   > T" U 5      S   $ r  r   )r   r   s    r   r  0Reduction._unroll_reduction_fn.<locals>.<lambda>  s    E1r   )r   r  r   r   )r   r  r  r  r   r  )rn   r  r  guard_int_seqr  _fixed_indexerr  r  )r  r  r  r  r  r&  r   r  s   ``  @@@@r   _unroll_reduction_fnReduction._unroll_reduction_fn  s     77++99:JK-nH
		 		 11* 112BCM
)3E*  .-HIr   c
                `  ^^^^^^ [         R                  R                  R                  [	        T5      5      mTS:X  a`  SU4S jjn
U
" S5      U
" S5      U
" S5      U
" S5      S.mTT;   d
   T S35       eSUUU4S jjn[
        R                  UUU[        U5      S9$ TS:X  a-  TS;   a	  SU4S	 jjnO	SUU4S
 jjn[
        R                  UTXS9$ [        T[        5      (       a  [         R                  R                  R                  T5      [        R                  :  aW  [	        U5      S:w  d  [        UR                  5      (       a.  TS:w  a(  [
        R                  UTU R                  TTTU5      US9$ U R!                  UTUTUTTTU	5	      u  pSU4S jjnU" U5      nU["        R$                  :X  a  UnUS:X  a\  U	c   e[&        R(                  " [*        SS5         [-        U	5      u  nnSSS5        Wc   eWc   eU R/                  UTUTUTUUTU5
      $ US:  a  U R1                  UTUTUTTUUU	5
      nSn[        R2                  R4                  (       a&  [        U[6        5      (       a      SS jnU" U5      nU(       am  [        UR8                  [:        5      (       d   [        UR8                  5       5       eUR8                  R<                  S   Ul        TUl         UUl!        TUl"        U$ [6        R                  [;        UTTUTTUUS95      nU$ ! , (       d  f       GN5= f)za
Create a reduction node. May split the reduction to multiple layers to expose
more parallelism.
r   c                $  > T[         R                  :X  a  [        U 5      $ TR                  (       a0  [        U [        5      (       d   [        U 5      5       e[        U 5      $ [        U [        5      (       d   [        U 5      5       e[        U 5      $ r   )	r  r   rJ  r   r   r   rL  r   r   )valr  s    r   py_cnst!Reduction.create.<locals>.py_cnst  sm    

*9$00%c=99D49D9 :%%c;77BcB7s8Or   rA   )r~  r  r}  r{  z* not supported for zero-dimension tensors!c                8   > [         R                  " TT   T5      $ r   rl   rK  )r   r  r  rtypes_to_initss    r   const_fn"Reduction.create.<locals>.const_fn  s    ||ON$CYOOr   rh  r!  c                2   > [         R                  " ST5      $ r  r4  )r   r  s    r   r   Reduction.create.<locals>.fn  s    <<955r   c                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r   r  r  )r   r   reduction_indexr  r  s      r   r   r9  !  s2    =M&N=Muww||=MO&N#E;; 'O   $4r  c                l   > [        T5      (       a  U $ U S:  a  [        U [        R                  5      $ U $ r  )r   r-  rB   min_num_split)r  r  s    r   _maybe_increase_split/Reduction.create.<locals>._maybe_increase_splitJ  s2    /**qy5&"6"677r   r0  Nr  Tc                L   U R                  5       n[        U5      S:w  a  g [        [        U5      5      nU[        R
                  R                  ;  a  g [        R
                  R                  U   n[        U[        5      (       d  g UR                  R                  5       c   eU$ r  )r  r   nextiterrn   r  r  r   r  rM  rd  )cur_node
read_namesbufnamer  s       r   _find_split_reduction/Reduction.create.<locals>._find_split_reduction  s     "*!8!8!:J:!+#"4
#34Gagg&<&<<#''009C%c>::#88668DDDJr   r  )r0  r   r   zUnion[bool, float, int])r   r   r   rm   )r  r   r   r   )rE  r   r   zOptional[ComputedBuffer])#rn   r  r  simplifyri   rO  r  r   r   r!   size_hint_or_throwrB   unroll_reductions_thresholdre   r   r,  r  rU   r  r   r   r  rK   !create_multilayer_existing_rangescreate_multilayerr#  mix_order_reductionr   rM  r  r  _split_size_original_inner_fn_original_ranges_original_reduction_ranges)r  r  r  r  r  r  r  r  r  r  r1  r6  r   hintr  r@  r  r  r   split_reductionrH  r  r5  s     ` ` ``             @@r   r  Reduction.create  s   $ ''**33MBR4STa$ qz"1:
qz	O "_4 !""LM4P P ##!F|	 $   a!556 6
< < ##Y $  
 00  33OD001v&!+vfkk/B/B%' ##11.	  $   nn

	 &e,
 ]222!NB;)))n.>E3V40
0 F )))'33388 $  QY'' C #O}}00ZY5O5O'-$ #8"< "/"6"6	BB O0012B />.B.B.S.STU.V+5=2390=M:J!!1-#-	
 
k FEs   !L
L-c           
        U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R
                  $ U S;   aL  [        U5      (       a  [        S5      $ [        U5      (       a  g[        R                  " U5      R                  $ [        U5      (       a  SOSn[        U5      (       a  SOSnUUUUUX"U4X"U4[        S5      U4S	.U    $ )
N)r-  r  z-infF)r|  r  infTr   rA   )r~  r}  r  r  r{  welford_reducer  online_softmax_reduce)r,   rL  r+   r  iinfor|  r-  )r  r  zeroones       r   default_accumulatorReduction.default_accumulator  s     ..e$$V}$!%(({{5)---..e$$U|#!%(({{5)---(//uQ&u--d1#40 $D1&+FmT%:	
 	 		r   c                :    U S:X  a  g[         R                  X5      $ )NrY  r   )r  r^  r  r  s     r   default_valueReduction.default_value  s!     --,,^CCr   c                    U S:X  a  U$ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U S::  a*  US::  a$  U[         R                  :X  a  [         R                  $ U$ )Nr0     i      )rU   r  
OUTER_TINY)r  r  r  s      r   _multilayer_second_step_hint&Reduction._multilayer_second_step_hint  sg     B;!!C<J#-.MDWDW2W +++TMc!-"5"55 +++r   c                   Uc  g[         R                  R                  R                  UR	                  5       U5      (       d  gUR                  5          [        U5        UR                  5       n[        USS 5       H8  u  pE[         R                  R                  R                  US5      (       d  M6  Us  $    g! [         a     gf = f)z
If we are reducing over the full tensor, and it is non-dense in the last dimension,
reindex so we reduce over the dense dimension. initially just handle complete
reduction case
Nr0  rA   )
rn   r  r  r=  r  r  rO  r  r2  r   )r  r  r  rU  r   r   s         r   $check_for_split_dense_dim_reindexing.Reduction.check_for_split_dense_dim_reindexing  s     ww77  "O
 
 	!*- '')gcrl+DAww771== ,  # 		s   B: :
CCc                  ^^^^^
^ U R                  TU5      n[        R                  UT/U5      m[        R                  R
                  R                  [        R                  " TU-  S5      5      (       + m
      SUUUU
UU4S jjn	U	$ )Nr   c                "  >^^ Uu  nU Gt mnTU-  U-   mSUU
UU4S jjnT(       ac  [        T5      n[        R                  " [        R                  " TU5      [        R                  " TU5      5      n[        R                  " XST	5      $ U" 5       $ )Nc                 $   > T" TT" T /5      5      $ r   r   )r  ra  	new_indexr   s   r   bodyCReduction._multilayer_wrap_loader.<locals>.wrapper_fn.<locals>.body(  s    i');<<r   )r   rm   )r_   rl   r  r$  masked)r   r<  reduction_blockrq  index_dtyper  r  rp  
block_sizedefaultra  	need_maskr  r   s         @@r   
wrapper_fn5Reduction._multilayer_wrap_loader.<locals>.wrapper_fn!  s     "1_*/'Y ?2_DG= = -o>vvNN7K8NN?K@ zz$g66vr   )r   Sequence[Symbol]r<  r{  r   rm   )	rk  Viewdynamic_reshape_indexerrn   r  r  r  r   r-  )r  ra  r  r  r  rv  rw  r  dense_indexry  rx  r   s    ` ` ``   @@r   _multilayer_wrap_loader!Reduction._multilayer_wrap_loader  s     >>Z
 ../
 ((>>HH_u,a0
 
		#	6F		 	( r   c                   ^^^ [        S T 5       5      (       d   ST< 35       e[        R                  U[        U5      [        U5      -   5      m      SUUU4S jjnU$ )Nc              3  *   #    U  H	  oS :H  v   M     g7f)rA   Nr   r   r  s     r   r   DReduction._multilayer_wrap_loader_existing_ranges.<locals>.<genexpr>@  s     3?a6?s   z8Only enabled for numel_hint == 1, found original_ranges=c           	        > U S [        T5       nU [        T5      S  nT" UT" [        U5      [        U5      -   5      5      $ r   )r   r   )merged_indexnew_reduction_indexoriginal_idxrp  ra  original_rangesr   s       r   ry  EReduction._multilayer_wrap_loader_existing_ranges.<locals>.wrapper_fnG  sQ     ((>#o*>?L$S%9%;<Ii(51D+EEF r   )r  r  r  r  r   rm   )r   r|  r}  r   )r  ra  r  original_reduction_rangesr  r  ry  r   s    ``    @r   '_multilayer_wrap_loader_existing_ranges1Reduction._multilayer_wrap_loader_existing_ranges7  s     3?333 	
G6HI	
3 ..%uZ'85AU;V'V
		(		!/		 		 		 r   c                  ^ U[         R                  [         R                  4;  a  UO[         R                  n[        R                  UUUUUUU	U5      nUR                  5         UR                  5       m      SU4S jjn[        R                  R                  R                  [        U5      5      nU R                  XU5      nXWS[        U5       :X  d   e[        R                  [	        UUUUU[        U5      S U	UUS95      $ )I
Break a large reduction up into multiple smaller reductions
recursively
c                   > T" / U QUQ5      $ r   r   )r   r<  intermediate_loaders     r   intermediate_fn;Reduction.create_multilayer_helper.<locals>.intermediate_fn|  s     ''A'A'ABBr   Nr  )r   r  r<  r  r   rm   )r  float16bfloat16rL  r  r  r  r  rn   r  r  optimization_hintri   rh  r   r   )r  r  r  r  ry  r  r  r  r  r  r  r  intermediate_dtypeintermediater  r  r  s                   @r   create_multilayer_helper"Reduction.create_multilayer_helperT  s$   0  ??  	
 !'' 	
 	*668	C%	C8J	C	C
 WW%%77o8VW
99~
 -Cs?/C"DDDD(&!+C,@,B!C-#-	
 	
r   c                    [        U5      n[        XS-
  -   U5      nU R                  Xr5      nU R                  UUUUUUU
5      nU R	                  UUUUUU/ UQUPU/UUU	5      $ )r  rA   )ri   r=   rb  r  r  )r  r  r  r  r  r  r  r  r  r  r  r  rv  rw  ry  s                  r   rN  Reduction.create_multilayer  s    & ((89o;UC
##N>00

 ++feL
 	
r   c                j    U R                  UUUUU5      nU R                  UUUUUU/ UQUQUU	SU
5      $ )r  r0  )r  r  )r  r  r  r  r  r  r  r  r  r  r  ry  s               r   rM  +Reduction.create_multilayer_existing_ranges  sc    $ @@% 

 ++%+o+
+ 
 	
r   r   r  r  rC  r  r  )
r^  r  r_  rl  r`  r  r  r{  r   r   r  r   zSequence[Sequence[Expr]]r  r   )r  r  r  r  r  r  r  zCallable[_P, OpsValue]r  r  r  r  r  z%Union[ReductionType, Literal['scan']]r  r    r  Optional[IRNode]r   tuple[ReductionHint, _IntLike])
r  z<Callable[[Sequence[_IntLike], Sequence[_IntLike]], OpsValue]r  r  r  r   r  r  r   z(Callable[[Sequence[_IntLike]], OpsValue])r  r  r  r  r  r  r  r  r  r  r  r  r  rQ   r  rU   r  r  r   r   r  r   r  r  r   #Union[_NumLike, Sequence[_NumLike]])r  rz   r  r   r  rU   r   rU   )r  rz   r  r  r   r  )ra  Callable[..., OpsValue]r  r  r  rz   r  rz   rv  rz   rw  r  r  r  r   Callable[..., object])ra  4Callable[[Sequence[Expr], Sequence[Expr]], OpsValue]r  r  r  r  r  Sequence[Integer]r  r  r   z@Callable[[Sequence[sympy.Expr], Sequence[sympy.Expr]], OpsValue])r  r  r  r  r  r  ry  r  r  r  r  r  r  
list[Expr]r  list[Integer]r  rQ   r  rz   r  rU   r   r   )r  r  r  r  r  r  r  r  r  r  r  r  r  rQ   r  rz   r  rU   r  r  r   r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rQ   r  rU   r   r   )"r   r   r   r   r   r   rE  rY   ra  rg  rd  r  r  r!  r  rp  r  r  r,  rF  rU   r  r  r^  rb  rh  rk  r  r  r  rN  rM  r   __classcell__r  s   @r   r  r    s   ((!!!!N HK(
 
 )

%#L"L 3L 	L
 )L 
L=


  (,ggg g )	g
 #g -g >g g %g 
(g gR )N),) ) 	)
 
2) )V  )6(=(='+OO O 	O
 %O O )O &O &O %O 
O Ob $/	, > DD$/D	,D D %(:G	   &4D	 >  (,('( -( "	(
 ( ( 5( %( 
( (T D ( $2	
 & 0 
J 8 =
=
 =
 	=

 '=
 (=
 $2=
 =
 ,=
 &=
 =
 &=
 
=
 =
~  (,+
+
 +
 	+

 %+
 +
 )+
 &+
 +
 &+
 %+
 
+
 +
Z $
$
 $
 	$

 %$
 ($
 $2$
 "$
 ,$
 &$
 &$
 
$
 $
r   r  c                    ^ ^^ SUU U4S jjnU$ )1A closure containing math to read a given elementc                   > Tb  [        U 5      [        T5      :X  d   e[        U 5      [        T5      :X  d   eTn[        U TT5       H  u  p#nUS:w  d  M  XU-  -   nM     U$ r  )r   r   )r   resultr   stszrQ  r  r  s        r   r_  _fixed_indexer.<locals>.indexer  sj    !c%jCK&???5zSY&&&ufd3KCRQw(* 4 r   )r   r  r   r   r   )r  r  rQ  r_  s   ``` r   r+  r+    s      Nr   INNER_FN_TYc                  z   ^  \ rS rSr% S\S'                     SU 4S jjr          SS jrSrU =r$ )	MultiOutputReductioni   r   output_indexc
                   >^ [        T5      (       a  T4m[        T5      S:X  a  TS   n
O      SU4S jjn
[        TU ]  UUU
UUUUUS9  Xl        g )NrA   r   c                4   >^ ^ [        U U4S jT 5       5      $ )Nc              3  4   >#    U  H  o" TT5      v   M     g 7fr   r   )r   r   r   reduction_idxs     r   r   @MultiOutputReduction.__init__.<locals>.loader.<locals>.<genexpr>  s     HiR]33i   )r   )r   r  	inner_fnss   ``r   ra  -MultiOutputReduction.__init__.<locals>.loader  s     HiHHHr   r  )r   r  r  r  r   ztuple[OpsValue, ...])callabler   r  __init__r  )r  r  r  r  r  r  r  r  r  r  ra  r  s      `       r   r  MultiOutputReduction.__init__  s     I"I y>Qq\FI#I4BI%I
 	-)) 	 		
 )r   c           	     N   [         R                  " U R                  U R                  U R                  U R                  X45      5      n[        U[        [        45      (       d   [        U5      5       eXPR                     n[         R                  " U=(       d    SU" U5      U5      $ rZ  )rl   r  r  r  r  r  r   r   r   r   r  r  )r  r^  r_  r`  r  r   r  s          r   r  $MultiOutputReduction.store_reduction(  s     JJNNMM$/	
 &5$-00>$v,>0(()"";#;)WT]ERRr   )r  )r  r  r  r  r  z)Union[INNER_FN_TY, Sequence[INNER_FN_TY]]r  r  r  r  r  rQ   r  r  r  rU   r  r   )
r^  r  r_  rl  r`  r  r  r{  r   r   )	r   r   r   r   r   r  r  r   r  r  s   @r   r  r     s    #)#) #) =	#)
 "#) ,#) &#) #) &#) #)JS"S 3S 	S
 )S 
S Sr   r  c                  j    \ rS rSr\\R                  S4                   SS jj5       rSrg)OnlineSoftmaxReductioni:  Nc
           	        ^^^^^^^ [        UUUUUUU4S j[        U5       5       5      n
U
 H  nUR                  5         M     U
$ )z.
Create the reduction disregarding splitting.
c              3  p   >#    U  H+  n[         R                  [        TTTTTS TTU5	      5      v   M-     g7f)rZ  N)r   r  r  )	r   
output_idxr  r  r  r  r  r  r  s	     r   r   0OnlineSoftmaxReduction.create.<locals>.<genexpr>K  sO      
 0
 $$+"
  0s   36)r   r   r  )r  r  r  r  r  r  r  
num_outputr  r  resultsr  s    `````` `   r   r  OnlineSoftmaxReduction.create;  sB       
 
 $J/
 
  AIIK r   r   )r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  rU   r  r  r   Sequence[TensorBox])	r   r   r   r   rF  rU   r  r  r   r   r   r   r  r  :  s     )6(=(='+!! ! 	!
 %! ! )! ! &! %! 
! !r   r  c                      \ rS rSr\\R                  4               SS jj5       r\      SS j5       r	\                  S	S j5       r
Srg)
WelfordReductioni`  c                  ^^^^^ US;   d   e[         R                  R                  R                  [	        T5      5      nS
UUU4S jjn	US:X  a  U	" S5      n
U	" S5      nU	" S5      nXU4$ US:X  aD      SUUUU4S jjmUS:X  a  T" US   5      U	" S5      U	" S5      4$ [        U4S jU 5       5      $ [        R                  TTTUS   TTUUS9u  pU[        R                  :X  a  UnUS:  a  U R                  TTUTTUUU5      $ [        S	5       Vs/ s H)  n[        R                  [        TTUTTUTUU5	      5      PM+     nnU H  nUR                  5         M     U$ s  snf )N)rY  r  c                V   >^  SUU 4S jjn[         R                  TTU[        T5      S9$ )Nc                2   > [         R                  " TT5      $ r   r4  )r   r  r0  s    r   r  8WelfordReduction.create.<locals>.const.<locals>.inner_fnq  s    || r   rh  r   r  r   rm   rO  r  r   )r0  r  r  r  r  s   ` r   const&WelfordReduction.create.<locals>.constp  s7      ##!F|	 $  r   r   rA   c                V   >^  SU U4S jjn[         R                  TTU[        T5      S9$ )Nc                r   > T Vs/ s H  n[         R                  R                  PM     nnT" X5      $ s  snf r   r;  )r   r   r<  ra  r  s      r   r  7WelfordReduction.create.<locals>.copy.<locals>.inner_fn  s2    =M&N=Muww||=MO&N!#77 'Or=  rh  r  r  )ra  r  r  r  r  r  s   ` r   copy%WelfordReduction.create.<locals>.copy  s7    8 8 !''!%<	 (  r   rY  c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r  s     r   r   *WelfordReduction.create.<locals>.<genexpr>  s     :	"T"XX	r  )r  r  r   )r0  r   r   r   )ra  r  r   r   )rn   r  r  rJ  ri   r   r  r  rU   r  rN  r   r   r  r  r  )r  r  r  r  r  r  r  r  r  r  meanm2weightrT  r  r  r  r  r  s    `` ``            @r   r  WelfordReduction.createa  s    !FFFF''**33MBR4ST	 	 a8DqB1XFV##aL  !11IaL)58U1X==:	:::&  **aL)+ + 	
 ]222!N19(( 	 	2 $Ah
 '
  $""
 ' 	 
  AIIK %
s   0E#c                    g)N)r   r   r   r   ra  s     r   rb  WelfordReduction.default_value  s     r   c	                  ^ ^^^^^^ [        T5      m[        R                  R                  R	                  [
        R                  " TT-  S5      5      (       + n	U	(       aB  US:w  a<          S
U4S jjn
T R                  UTUS   [        U
SS9[        U
SS94UTSTUS9$ [        TTS-
  -   T5      m[        R                  UT[        UU UUU4S jU 5       5      / UQTPT/UU5      nU H  nUR                  5         M             SS jm[        R                  R                  R                  [        U5      5      nT R                  TX5      n[        R                  UT[        U4S	 jU 5       5      UT/SU5      $ )r  r   r  c                2   > [         R                  " UT5      $ r   r4  )r   r  r  r  s      r   rK  4WelfordReduction.create_multilayer.<locals>.constant  s     ||E511r   r  rA   )r  r  r  r  r  r  r  r  c           
   3  L   >#    U  H  nTR                  UTTTTS S9v   M     g7f)r   )rw  N)r  )r   ra  rv  r  r  r  r  s     r   r   5WelfordReduction.create_multilayer.<locals>.<genexpr>	  s>      
 (F ++$# ,  (s   !$c                    U" / U QUQ5      $ r   r   )r   r<  ra  s      r   intermediate_loader_fnBWelfordReduction.create_multilayer.<locals>.intermediate_loader_fn$	  s    
 4E4O455r   c              3  T   >#    U  H  n[        TUR                  5       S 9v   M     g7f))ra  N)r   r  )r   r   r  s     r   r   r  2	  s&      &A .q}}G&r  )r   r  r  r  r  r   r   rm   )r   r  r<  r  ra  r  r   rm   )ri   rn   r  r  r  r   r-  rN  r   r=   r  r  r   r  r  rh  )r  r  r  r  r  r  r  r  r  rx  rK  intermediatesr   r  rv  r  r  s   ` `  ` `      @@@r   rN  "WelfordReduction.create_multilayer  s     ((89((>>HH_u,a0
 
	 +<<2#24B2KN22
 ((aLHA.HA.
 !10- )   o;UC
(// 
 
 (
 
 feL#
& AIIK 	6!	6+	6 9	6 		6 WW%%//f0EF
99:
  && &  G
 	
r   r   N)r  r  r  r  r  Sequence[Callable[..., Any]]r  r  r  r  r  rQ   r  rU   r   r  r  )r  r  r  r  r  r  r  r  r  r  r  rQ   r  rz   r  rU   r   r  )r   r   r   r   rF  rU   r  r  r  rb  rN  r   r   r   r   r  r  `  s    )6(=(=vv v 0	v
 v (v &v &v 
v vp $/	, 
 Z
Z
 Z
 0	Z

 Z
 (Z
 &Z
 Z
 &Z
 
Z
 Z
r   r  c                    ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   \" S 5      S S!U 4S jjj5       rS"U 4S jjr          S#S jrS$S jr	S%S jr
S%S jrS%S jrS&S jrS'S jrS S!S jjr\\R$                  4SS.                   S(S jjj5       r\                  S)S j5       rSrU =r$ )*Scani>	  r  scan_rangesr  =Callable[[tuple[Any, ...], tuple[Any, ...]], tuple[Any, ...]]r  zFCallable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]]r   rU   r  r   r  tuple[torch.dtype, ...]dtypestuple[Callable[..., Any], ...]r  c                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r  s     r   r   ,Scan.get_free_symbol_uses.<locals>.<genexpr>T	       O>N"1m44>Nr  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r  s     r   r   r  W	       Hi"1m44ir  )r  ra  r:   r  r  r  r  s    `r   ra  Scan.get_free_symbol_usesL	  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  r  r  r  r  r  r  s    r   r  Scan.__post_init__[	  =    4;;#d&6&6"773tyy>IIIr   c                "  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  U R                  U5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r   r  r   s     r   r   'Scan.store_reduction.<locals>.<genexpr>g	       D^x}}^   r[  )	r   r   r  rl   r  r  r  r\  r  )r  r^  r_  r`  	scan_varsr   r  r   s          @r   r  Scan.store_reduction_	  sk     ll4+DT^^DD$++t?yy$9gclF;L;L4M
 	
r   c                    g)Ncustomr   r  s    r   rd  Scan.get_reduction_typem	  s    r   c                    U R                   $ r   )r  r  s    r   rg  Scan.get_reduction_sizeq	  r  r   c                    U R                   $ r   r  r  s    r   r	  Scan.get_sizet	      yyr   c                    U R                   $ r   r  r  s    r   r  Scan.get_pointwise_sizew	  r  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r  r  s    r   r  Scan.index_lengthz	  !    4;;#d&6&6"777r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  r  r  r@   r  r   r  r   r  r   s       r   r!  Scan.inner_fn_args}	  C    DKK(T--t}}=ll5)vr   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r2  )r  r  r  r@   r  r   rJ   r  r  ri  r   r  r   s        r   r  Scan.inner_fn_free_symbols	  M    DKK(T--t}}=ll5)#DMM3TTr   T)can_fallback_to_atenc                 ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [        U5      S:  aB  [         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      n[        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[        R                  UX,   X<   US9PM     sn$ U R!                  UUS   US   TTTUUS9u  p}["        nUS:  at  [$        R&                  R(                  S L =(       d    [*        =(       a	    [,        S:  =(       a    [        U5      S:H  nU(       d  U(       a  S /[        U5      -  $ SnO[.        nSUUU4S jjn[        [        U5      5       Vs/ s H/  n[0        R                  U" S	UX,   UX<   UUTTUUUUS.U	D65      PM1     nnU H  nUR3                  5         M     U$ s  snf s  snf )
NrA   rh  r   )r  r  r  axispointwise_rangesr  r  
scan_numelz3.3.0c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r   )r   
scan_indexr#  r$  r  s     r   r   Scan.create.<locals>.reindex	  S    z?c+&6666u:%5!6666>U5D\>J>tu>>r   )r  r  r  r  r  r  r  r  r  r   r  r  )r   r  r(  r  r   r  r   )rn   r  r  rD   SCANr   TUPLE_REDUCTIONr  rJ  ri   r  r   r5  r   rO  r  r  r  r  versionhip
has_tritontriton_version	SplitScanr   r  )r  r  r  r  r  r#  r  r  r!  r   r  r%  r  r  	scan_typesupports_splitr   r  r  r$  r  s        `             @@r   r  Scan.create	  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''v;?177#6#6N22$
 $
 6CK''77##&&}['AB
6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7  &)^^)q\-#!! &4 	&
" 	> !!T)Wj.V^w=V% v;!#  "' 6CK//!"J%		? 	?. !&c&k 2%
$ !3#  ! .!&4'+ +)##1!- " !3% 	 
* FNN  AP
s   ;$I3 6I8c	                L   ^^ SUU4S jjn	[         R                  UUUU	UUSUS9$ )Nc                ,   > T" / U S T QUQU TS  Q5      $ r   r   )r   r  r#  r  s     r   ry  #Scan.num_splits.<locals>.wrapper_fn	  s*    Fc%4jF=F3tu:FGGr   r  )r  r  r  r  r  r  r  r  )r   r  r  r  r   rm   )r  r  )
r  r  r  r  r#  r$  r  r  r%  ry  s
      ``     r   r  Scan.num_splits	  sA    	H 	H ###(!& $ 	
 		
r   r   r  rC  r~  )
r^  r  r_  z%Callable[[Sequence[_IntLike]], Never]r`  r  r	  r{  r   r   r  r  r  rB  )r  r  r  r  r  z+tuple[Callable[[Sequence[Expr]], Any], ...]r  r  r#  r   r  r  r  rU   r!  r   r   r   r   Sequence[Optional[TensorBox]])r  r  r  r  r  r  r#  r   r$  r  r  r  r  r  r%  r    r   r  )r   r   r   r   r   rY   ra  r  r  rd  rg  r	  r  r  r!  r  rF  rU   r  r  r  r   r  r  s   @r   r  r  >	  s   
MMSS!!##-- F#
 
 $
 
"
 7
 	

 $
 

 8U  )6(=(=` &*`` (` ?	`
 ` ` R` &` #` ` 
'` `D 

 
 7	

 
 (
 #
 R
 
 
(
 
r   r  c                      \ rS rSrSrg)r1  i	
  r   N)r   r   r   r   r   r   r   r   r1  r1  	
  s    r   r1  c                  n  ^  \ rS rSr% S\S'   S\S'   S\S'   S\S'   S	\S
'   S\S'   S\S'   S\S'   S\S'   \" S 5      SSU 4S jjj5       rS U 4S jjr          S!S jrS"S jr	S#S jr
S#S jrS#S jrS$S jrS%S jrSSS jjr\\R$                  4                   S&S jj5       rSrU =r$ )'Sorti
  r  sort_rangesr  z:Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]]r   rU   r  r   r  r  r  r  r  r   stable
descendingc                   >^ [         TU ]  T5      [        5       R                  " U4S jU R                   5       6 -  [        5       R                  " U4S jU R
                   5       6 -  $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r  s     r   r   ,Sort.get_free_symbol_uses.<locals>.<genexpr>$
  r  r  c              3  <   >#    U  H  n[        UT5      v   M     g 7fr   r'   r  s     r   r   rB  '
  r  r  )r  ra  r:   r  r=  r  r  s    `r   ra  Sort.get_free_symbol_uses
  s]     G(7l  Od>N>NO l  HdiiH		
r   c                   > [        U R                  5      [        U R                  5      -   [        U R                  5      :X  d   e[        TU ]  5         g r   )r   r  r=  r  r  r  r   s    r   r  Sort.__post_init__+
  r  r   c                6  ^ U R                  X45      m[        U4S jU R                   5       5      n[        R                  " U R
                  XPR                  U R                  5      n[        R                  " U=(       d    SU" T5      X`R                     5      $ )Nc              3  2   >#    U  H  o" T5      v   M     g 7fr   r   r  s     r   r   'Sort.store_reduction.<locals>.<genexpr>7
  r  r  r[  )
r   r   r  rl   sortr  r>  r?  r\  r  )r  r^  r_  r`  r  r   r  r   s          @r   r  Sort.store_reduction/
  so     ll40DT^^DD$++v{{DOOLyy$9gclF;L;L4M
 	
r   c                    g)NrJ  r   r  s    r   rd  Sort.get_reduction_type=
  s    r   c                    U R                   $ r   )r=  r  s    r   rg  Sort.get_reduction_size@
  r  r   c                    U R                   $ r   r  r  s    r   r	  Sort.get_sizeC
  r  r   c                    U R                   $ r   r  r  s    r   r  Sort.get_pointwise_sizeF
  r  r   c                X    [        U R                  5      [        U R                  5      -   $ r   )r   r  r=  r  s    r   r  Sort.index_lengthI
  r  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X5      nU4$ r   )r  r  r=  r@   r  r   r  s       r   r!  Sort.inner_fn_argsL
  r  r   c                    U R                  U R                  5      nU R                  U R                  [        R                  5      nU R                  X#5      n[        U R                  XAS9$ r2  )r  r  r=  r@   r  r   rJ   r  r  s        r   r  Sort.inner_fn_free_symbolsR
  r   r   c	                  ^^^ / US T QUTS-   S  QmUT   /m[         R                  R                  U[        R                  5      (       d  S /[        U5      -  $ [         R                  R                  n
U
R                  [        T5      5      nSn[        R                  R                  =(       a%    U
R                  [        R                  " X5      5      nU(       d  S /[        U5      -  $ [        U5      [        U5      :X  d   eU
R                  [        R                  " US5      5      (       a=  [        [        U5      5       Vs/ s H  n[         R#                  UX.   X>   US9PM     sn$ SUUU4S jjn[        [        U5      5       Vs/ s H3  n[$        R#                  ['        SUX.   UX>   UUTTUUUUUS.U	D65      PM5     nnU H  nUR)                  5         M     U$ s  snf s  snf )NrA   re  rh  c                   > [        U5      [        T5      :X  d   e[        U 5      [        T5      :X  d   e/ U S T QUQU TS  Q$ r   r'  )r   
sort_indexr#  r$  r=  s     r   r   Sort.create.<locals>.reindex
  r*  r   )r  r  r  r  r  r  r  r=  r   r  r  r>  r?  )r   r  r\  r  r   r  r   )rn   r  r  rD   SORTr   r  rJ  ri   rB   r#  persistent_reductionsr  r   r5  r   rO  r  r   r<  r  )r  r  r  r  r  r#  r>  r?  r  r   r  
sort_numel
max_rblockis_persistent_kernelr  r   r  r  r$  r=  s        `            @@r   r  Sort.createX
  s    =T%4[<4q
+;<Dzlww""6>+>+>??6CK''77##&&}['AB
 
MM// Q..uxx
/OP 	 $6CK''6{c)n,,, ))%((:q*ABB %*#f+$6 %7L   ! .&4	 !  %7 	? 	?0 !&c&k 2'
& !3%  ! .!&4'+ +##1!-!) $ !3' 	 
, FNN  Q
s   $G&:G+r   r  rC  r~  )
r^  r  r_  r  r`  r  r  r  r   r   r  r  r  r  )r  r  r  r  r  z'tuple[Callable[[list[Expr]], Any], ...]r  r  r#  r   r>  r   r?  r   r  rU   r   r   r   r9  )r   r   r   r   r   rY   ra  r  r  rd  rg  r	  r  r  r!  r  rF  rU   r  r  r   r  r  s   @r   r<  r<  
  sC    
GG!!##--L F#	
 	
 $	
 
"
 2
 	

 '
 

 8U  )6(=(=LL (L ;	L
 L L L L &L L 
'L Lr   r<  c                :     [        U SS9  g! [         a     gf = f)NFfreezeT)rO  r  r   s    r   r
  r
  
  s&    a. s   
 
c                     [        U SS9u  pUR                  5       (       a  UR                  5         UR                  5       $ ! [         a     gf = fNFre  )rO  should_pad_stridespad_stridesis_contiguousr  )r   _bufferrN  s      r    is_contiguous_storage_and_layoutrm  
  sS    /%@ $$&& ##%% s   A A 
AAc           	        [        U [        5      (       a  [        U R                  UUUUUS9$ [        U [        5      (       a5  [        U R                  UUUUUS9u  pgX R                  R                  5       4$ [        U [        5      (       a  U(       as  U(       a6  U R                  5         U R                  5       R                  5       (       d   eO6Ub  U R                  X4S9  O#Ub  U R                  XTS9  OU R                  5         [	        U 5      U R                  5       4$ [        U [        5      (       a#  [        U R                  US9u  pXR                  4$ [        e)z
Try to simplify x into a StorageBox and a Layout.

allow_padding only affect how we apply stride_order. When allow_padding
is True, we have the freedom to add padding when applying the stride_order.
rf  want_contiguousstride_orderr@  rN  r@  re  )r   r   rO  rM  rq  r  rr  r<  rk  rA  rO  r  rS  rN  r  )	r   rf  rp  rq  r@  rN  r   rN  buffers	            r   rO  rO  
  sJ    !Y$FF+%''
 	
 !Z  )FF+%''
	 &&##%%%!V!||~335555)11  2  *22! 3  !!}alln,,!_%% *FF
	 xx
r   c                ^     [        U SS9u  p#UR                  U5      $ ! [         a     gf = frh  )rO  is_stride_orderedr  )r   rq  rl  rN  s       r   "is_stride_order_storage_and_layoutrv  
  s8    /%@''55 s    
,,c                   [        U [        [        45      (       a  [        U R                  5      $ [        U [
        5      (       a}  U R                  n[        R                  R                  R                  UR                  [        UR                  5      -  [        5      (       + n[        U R                  5      =(       d    U$ [        U [        5      (       a+  U R!                  5       [        R                  R"                  ;   $ gr  )r   r   rq  is_unalignedrM  rS  rN  rn   r  r  statically_known_multiple_ofrQ  r`   r  rb   rr  rs  unaligned_buffers)r   rN  has_unaligned_layouts      r   rx  rx    s    $J/00DII&&$((#$77#3#3#P#PMMN6<<88/$
  
 DII&>*>>$}}!''";";;; r   c                     \ rS rSr% S\S'   \" S 5      SSS jj5       rSS jrSS jrSS jr	\
S S j5       rS!S	 jrS"S
 jrS#S jrS$S jrS%S jrS&S jrS'S jrS(S jrS)S jrS*S jrS'S jrS'S jrS+S jrS,S jrS-S jrS.S jrSrg)/ro  i  r   rM  c                8    U R                   R                  U5      $ r   rM  ra  r`  s     r   ra  BaseView.get_free_symbol_uses  s    yy--m<<r   c                    [        SU  35      e)Nzmake_reindexer NYI on r  r  s    r   make_reindexerBaseView.make_reindexer  s    !$:4&"ABBr   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r   innerr   s    r   r_  &BaseView.make_indexer.<locals>.indexer#      &&r   )r   r  r   r    )rM  r  r  )r  r_  r  r   s     @@r   r  BaseView.make_indexer  s4    		&&(%%'	' 	' r   c                p   ^^ U R                   R                  5       mU R                  5       mSUU4S jjnU$ )Nc                    > T" T" U 5      5      $ r   r   r  s    r   ra  $BaseView.make_loader.<locals>.loader,  r  r   r  )rM  r  r  )r  ra  r  r   s     @@r   r  BaseView.make_loader(  s4    		%%'%%'	' 	' r   c                6    U R                   R                  5       $ r   )rM  r  r  s    r   r  BaseView.dtype1  s    yy""$$r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  BaseView.get_layout5      yy##%%r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  BaseView.get_device8  r  r   c                    g r   r   r  s    r   r  BaseView.get_origin_node;  r  r   c                6    U R                   R                  5       $ r   rM  rs  r  s    r   rs  BaseView.get_name>      yy!!##r   c                "    U R                  5       $ r   r  r  s    r   r  BaseView.get_pointwise_sizeA      }}r   c                8    U R                   R                  U5      $ r   rM  r3  r1  s     r   r3  BaseView.mark_reuseD      yy##E**r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  BaseView.has_exceeded_max_readsG      yy//11r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  BaseView.realizeJ      yy  ""r   c                8    U R                   R                  5         g r   rM  r7  r  s    r   r7  BaseView.realize_hintM  s    		 r   c                6    U R                   R                  5       $ r   rM  r]  r  s    r   r]  BaseView.get_storage_numelP      yy**,,r   c                6    U R                   R                  5       $ r   rM  rj  r  s    r   rj  BaseView.is_externS      yy""$$r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  5       $ r   )r   rM  ro  r   is_module_bufferr  s    r   r  BaseView.is_module_bufferV  s9    $))X..?TYY?.yy))++r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  BaseView.get_read_namesZ      yy''))r   c                    [         R                  " [        SS5         [        U R	                  5       U R                  5       5      R                  sS S S 5        $ ! , (       d  f       g = fr  )r   r   r  rL   r  r	  rV  r  s    r   r  BaseView.get_reads]  sD    \\.*:DA&  " e	 BAAs   2A
A'c                z    U n[        U[        5      (       a#  UR                  n[        U[        5      (       a  M#  U$ r   )r   ro  rM  )r  r   s     r   rp  BaseView.unwrap_viewd  s1    H%%A H%%r   c                    U R                  5       n[        R                  " [        SU5      " U5      n[	        UU R                  5       UU R                  5       S9$ re  )r  r   r   ri  rO  r  r	  rj  s      r   rp  BaseView.constant_to_devicej  sN    !!#n.?HP.."==?	
 	
r   r   Nr  rC  r   *Callable[[Sequence[Expr]], Sequence[Expr]]r  r  r  r  r  r  r  r  r  r  r  r~  r  r  r  r  r  )r   r   r   r   r   rY   ra  r  r  r  r  r  r  r  r  rs  r  r3  r  r  r7  r]  rj  r  r  r  rp  rp  r   r   r   r   ro  ro    s    
LJ'= (=C % %&&$+2#!-%,*	
r   ro  c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr	  SS jr
Srg	)r   iv  r  r  c                   [         R                  R                  nU Vs/ s H  n[        R                  " U5      PM     nnU R                  5       nS/[        U5      [        U5      -
  -  [        U5      -   n[        U5      [        U5      :X  d   e[        [        U5      5       H  nX   S:X  a  XE   c   eXE   X'   M  XE   b0  [         R                  R                  R                  XE   5      (       a  MQ  X   nXE   nUc   eUc   eXg-
  nUR                  USS9S:X  a  M}   SU R                  5        SU SU 35       e   U$ s  snf )zReplace `-1` with correct sizesNr0  r   fallbackzBroadcast failed in ExpandView(, z) on dimension )rn   r  r  r   r#  r	  r   r   r   is_size_one_or_falser  )	r   new_sizer  r   old_sizer   v1v2diffs	            r   _normalize_sizeExpandView._normalize_sizez  s^    77##-56XELLOX6::<6S]S]:;d8nL8}H---s8}%A{b {...&k$(8(8(M(M) )  [[~%~~%~w..!" /  	 6ajjl^2hZ_`^ab) &8 A 7s    Ec                   U R                  X5      n[        U5      (       Ga   [        U5      u  p4[        U5      [        UR                  5      -
  nUS:  d   e[
        R                  R                  /U-  n[        UR                  UR                  5       H^  u  pxUR                  [        R                  R                  R                  U5      (       d  UO[
        R                  R                  5        M`     [        UR                   UR"                  [%        U5      UUR&                  UR(                  5      n	[+        X9S9$ [-        XS9$ )Nr   rL  )rM  r  )r  r
  rO  r   r  r   r  r  r   r  r  rn   r  r  r  rP  r  r  r   rQ  rR  rS  r   )
r  r   r  rV  rW  skiprX  r  r  rY  s
             r   r  ExpandView.create  s
   &&q3 ##"7":Gx=3z#77D199'',,$.J #J$5$5z G!!77++@@FF  !H %!!  X!!$$J #CCq00r   c                    U R                   $ r   r  r  s    r   r	  ExpandView.get_size  r  r   c                   ^^ U R                  5       nU R                  R                  5       m[        U5      [        T5      -
  m    SUU4S jjnU$ )Nc                   > [        U TS  5      n [        U 5      [        T5      :X  d   e[        [        T5      5       H*  nTU   S:X  d  M  [        R                  R
                  X'   M,     U $ r  )r   r   r   r   r  r  )r   r   actualr  s     r   r   *ExpandView.make_reindexer.<locals>.reindex  s_     tu&Eu:V,,,3v;'!9>$ww||EH ( Lr   r   r  r   r  )r	  rM  r   )r  targetr   r  r  s      @@r   r  ExpandView.make_reindexer  sU     ##%6{S[(		!				 		 r   r   N)r   r   r  r  r   r  )r   r   r  r  r   ro  r  r  )r   r   r   r   r   r  r  rF  r  r	  r  r   r   r   r   r   r   v  sA    
# #J 1 14	3r   r   c                  `    \ rS rSr% S\S'   \S
S j5       r\SS j5       rSS jr  SS jr	Sr
g	)PermuteViewi  r  dimsc           
        U R                  U5      n[        U5      [        [        [        U5      5      5      :X  d   e[	        U5      (       a  [        U5      u  p4[        UR                  UR                  U Vs/ s H  oTR                  U   PM     snU Vs/ s H  oTR                  U   PM     snUR                  UR                  5      n[        X6S9$ [        XS9$ s  snf s  snf )NrL  )rM  r  )_map_neg_dimsr:   r   r   r
  rO  rP  r  r  r  r  rQ  rR  rS  r  )r  r   r  rV  rW  r   rY  s          r   r  PermuteView.create  s      &$:eCI.>#???? ##"7":G$!!  -12T#T2/34t!""1%t4!!$$J #CC-- 34s   :CC$c                X    U Vs/ s H  o"S:  a  UO[        U5      U-   PM     sn$ s  snf r  r'  )r  r  rB  s      r   r  PermuteView._map_neg_dims  s+    @DEaxSY_4EEEs   'c                   [        U R                  U R                  5      5      [        [        [	        U R                  5      5      5      :X  d   eU R
                  R                  5       nU R                   Vs/ s H  o!U   PM	     sn$ s  snf r   )r:   r  r  r   r   rM  r	  )r  r  r   s      r   r	  PermuteView.get_size  sq    $,,TYY78J#dii.!=
 
 	
 
 yy!!#!%+AQ+++s   8B	c                Z  ^ [        U R                  5       VVs0 s H  u  pX!_M	     snnm[        [        U R                  5      5       Vs/ s H  nTU   PM
     snm[	        T5      [	        [        [        U R                  5      5      5      :X  d   e    SU4S jjnU$ s  snnf s  snf )Nc                8   > T Vs/ s H  oU   PM	     sn$ s  snf r   r   )r   r   invs     r   r   +PermuteView.make_reindexer.<locals>.reindex  s     '**c!Hc***s   r  )r   r  r   r   r:   )r  r   r  r   r  s       @r   r  PermuteView.make_reindexer  s     !*$)) 45 4qt 45$S^454!s1v45#*U3tyy>-B"CCCC	+!	+	+
  65s   B"B(r   N)r   r   r  r  r   ro  )r  r  r   	list[int]r  r  )r   r   r   r   r   rF  r  r  r	  r  r   r   r   r   r  r    sB    
. .$ F F,	3r   r  c                  V    \ rS rSr\SS.SS jj5       r\    S	S j5       rS
S jrSr	g)SqueezeViewi  N)rB  c          	        [        U5      (       Ga]  [        U5      u  p4/ n/ nUbF  [        U[        5      (       d   [	        U5      5       eSU::  a  U[        UR                  5      :  d   e[        [        UR                  UR                  5      5       H  u  nu  pUcT  [        R                  R                  R                  U5      (       d$  UR                  U5        UR                  U	5        M]  M_  Xr:w  a$  UR                  U5        UR                  U	5        M  US:X  a  M   S5       e   [        UR                   UR"                  UUUR$                  UR&                  5      n
[)        X:S9$ Uce  [*        R-                  UUR/                  5        Vs/ s H5  n[        R                  R                  R                  U5      (       a  M3  UPM7     sn5      $ UR/                  5       U   S:X  d   e[*        R-                  U[        UR/                  5       5       VVs/ s H  u  p{Xr:w  d  M  UPM     snn5      $ s  snf s  snnf )Nr   rA   zexpected squeezed size to be 1rL  )r
  rO  r   r   r   r   r  r   r   r  rn   r  r  r  r  rP  r  r  rQ  rR  rS  r|  r  r	  )r  r   rB  rV  rW  r  rX  r   r  r  rY  r   s               r   r  SqueezeView.create  s    ##"7":GHJ!#s++6T#Y6+CxC#joo*>$>>>%.s:??JDUDU/V%W!>D;77++@@FF -"))&1 G x -"))&1#qyJ*JJy &X %!!  !!$$J #CC;;; ZZ\)77++@@C )  ::<$)));;q1::<1H"U1HAH11H"UVV #Vs   2H9

H9
H>+H>c                   ^^ U  Vs/ s H  oS:w  d  M
  UPM     nn[        U 5       VVs/ s H  u  p1US:w  d  M  UPM     snnm[        U 5      mSUU4S jjnX$4$ s  snf s  snnf )NrA   c                   > [        U 5      [        T5      :X  d   U  ST 35       e[        R                  R                  /T-  n[	        TU 5       H	  u  p#X1U'   M     [        U5      $ )N )r   r   r  r  r   r   )r   rp  r   r   lengthnot_ones       r   r   %SqueezeView.squeezer.<locals>.reindex<  sb    u:W-C%'/CC-%*WW\\NV$;Igu-!"# .##r   )r   r  r   ztuple[Expr, ...])r   r   )r  r   r  r   r   r  r  s        @@r   squeezerSqueezeView.squeezer4  sc      $.t!AvAt.!*4;AF1;T	$ 	$    /;s   	AAA A c                    [        S5      e)Nzuse SqueezeView.create())AssertionError)r  rM  s     r   r  SqueezeView.__init__E  s    788r   r   )r   r   rB  r  r   r   )r  r  r   z>tuple[list[int], Callable[[Sequence[Expr]], tuple[Expr, ...]]])rM  r   r   r   )
r   r   r   r   rF  r  r  r  r  r   r   r   r   r  r    sC    7; +W +WZ !!	G! ! 9r   r  c                  ~    \ rS rSr% S\S'   S\S'     SS jrSS jrSS jr\r\	        SS	 j5       r
SS
 jrSrg)GenericViewiI  r  r  r  r   c                    U R                   $ r   )r   r  s    r   r  GenericView.make_reindexerN  s     ||r   c                   [        [        U R                  5      5       Vs/ s H  n[        [        R
                  U5      PM     nn[        U R                  U5      5      nSSR                  [        [        U5      5       SU 3$ s  snf )Nzlambda r  r  )r   r   r  rh   r@   rG  r   r   r  r  r   )r  r  	index_old	index_news       r   reindex_strGenericView.reindex_strS  sv    CHTYYCX
CXa*4::q9CX 	 
 i01	3sI#6789+FF	
s   $Bc                z    U R                  U R                  SU R                   3SU R                  5        3/5      $ )Nsize=zreindex=)r  rM  r  r  r  s    r   r   GenericView.__str__Z  s=    YY%		{+x8H8H8J7K-LM
 	
r   c                $    U " U[        U5      US9$ )NrM  r  r   )r   )r  r   r  r   s       r   r  GenericView.createa  s     X@@r   c                    U R                   $ r   r  r  s    r   r	  GenericView.get_sizej  r  r   r   Nr  r  )r   r   r  r  r   r  r   ro  r  )r   r   r   r   r   r  r  r   rE  rF  r  r	  r   r   r   r   r  r  I  sp    
77	3
G

 HAA !A <	A
 
A Ar   r  c                      \ rS rSrSr\S
S j5       r\\SS j5       5       r	\      SS j5       r
\ S       SS jj5       r\ S       SS jj5       rS	rg)r|  in  z
This class handles tensor reshaping by computing appropriate index transformations
to map the new shape back to the original storage layout.
c                   [         R                  " U 5      n [         R                  " U5      n[        R                  R                  R
                  R                  nU" [         R                  " U S5      5      (       a  X-   n U $ r  )r   r#  rn   r  r  r   evaluate_exprLt)r   r  r  s      r   handle_negative_indexView.handle_negative_indexu  s[    ll3||D!((22@@#q)***C
r   c                  ^ ^^^^ [        T[        5      (       d   [        T5      5       eT R                  UR	                  5       T5      u  mm[
        R                  R                  R                  TT5      (       a  U$ [        [        T5      5      S:  =(       d    [        [        T5      5      S:  m[        U5      n        SS jm    SU UUUU4S jjnST;   a  SU4S jjnT " U[        T5      US9$ U(       a  T" UT[        R                  T5      5      $ [        U5      (       d  U" U5      $ [!        USS9u  pgUR"                  n[
        R                  R                  R%                  T5      n	[
        R                  R                  R%                  U5      n
[
        R                  R                  R%                  T5      nSSKJn  U" U	U
UTS	9nUb  U Vs/ s H@  n[+        US
5      (       a  UR,                  R.                  O[0        R2                  " U5      PMB     nn[5        UR6                  UR8                  TUUR:                  UR<                  5      n[?        UUS9$ U" U5      $ s  snf )Nr   c                    [        U SS9u  p4[        UR                  UR                  UUUR                  UR
                  5      n[        X5S9$ )NT)rp  rL  )rO  rP  r  r  rQ  rR  rS  )rl  r  rX  rV  rW  rY  s         r   create_reinterpret_view,View.create.<locals>.create_reinterpret_view  sT     #8T"RG$!!  !!$$J #CCr   c                   > T(       a2  [         R                  U 5      n T" U T[        R                  T5      5      $ TR	                  TT5      nT" U [        T5      US9$ )z
Handle the case where view is not possible with current strides.
For unbacked symbols, make contiguous; otherwise use dynamic_reshape_indexer.
r  )r  require_contiguousr  r  r}  r   )r   r   r  r  r  r  unbacked_symbols_in_sizess     r   "handle_unbacked_or_dynamic_reshape7View.create.<locals>.handle_unbacked_or_dynamic_reshape  s`     ) !33A6.x!B!B8!L  11(HEGADNGDDr   c                4   > [        S/[        T5      -  5      $ r  )r   r   )r   r  s    r   fake_reindex!View.create.<locals>.fake_reindex  s    aS3x=011r   r  Fre  )_compute_stridesize_obliviousr   rL  )rl  r   r  r  rX  r  r   rS  r   r   r   r   )r   r   r   ztuple[int, ...]) r   r   r   resolve_negative_sizer	  rn   r  r  statically_known_list_equalsr   r2   rm  r   r  r  r
  rO  r  to_symints_or_intstorch._subclasses.fake_implsr!  r  r   exprr   r!   rP  r  r  rQ  rR  rS  )r  r   r  rk  r  r  rV  rW  
old_strideold_size_symintold_stride_symintnew_size_symintr!  new_stride_symintr   rX  rY  r  r  r  s   ` `              @@@r   r  View.create~  ss    (H--=tH~=- 66qzz|XN( 77888LLH %h/014 8(23a7 	" 9;	D	D#1	D?M	D	D	E	E	E 	E( =2 ADNLII *8^>>xH 
 %Q''5a88 4AeD&&
 ''**==hGGG,,??
K''**==hG@ ,4	
 ( +*A  'q&11u}}Q7GG*  
 %!!  !!$$J #
CC 2!44!s    AI3c                P   U Vs/ s H,  n[         R                  R                  R                  U5      PM.     nnU  Vs/ s H,  n[         R                  R                  R                  U5      PM.     n n[	        U5      n[        [        U5      5       HI  nX   S:X  d  M  [        R                  R                  X'   [        [        U 5      [        U5      5      X'     O   [         R                  R                  R                  [        U 5      [        U5      5        X4$ s  snf s  snf )Nr0  )rn   r  r  rJ  r   r   r   r   r  Oner<   ri   check_equals)r  r  r   r   s       r   r%  View.resolve_negative_size  s     ;CC(QAGG$$--a0(C:BC(QAGG$$--a0(C>s8}%A{b #ggkk&}X'>h@WX	 & 	
%%mH&=}X?VW!! DCs
   3D3D#Nc                     U R                  XU5      nU$ ! [        [        4 a=    [        U5      /nU R                  X5      nU R                  XR5      n[	        Xg5      n U$ f = fr   )_dynamic_reshape_indexerr  
IndexErrorri   r   )r  r  r  	dense_dimr   flatr   r   s           r   r}  View.dynamic_reshape_indexer  sr    	:228yQG  
+ 	:!(+,D33HCH33DCH%h9G	:s    A	A#"A#c                  ^^ [         R                  R                  R                  n[	        [        U5      5       Vs/ s H  n[        [        R                  U5      PM     snm[        [        TU5      5      n[        U 5      nUSL=(       a&    U[        U5      S-
  :g  =(       a    [        U5      S:H  nU(       a'  Uc   eUR                  U5      nUR                  U5        / mU(       GaK  U(       GaC  UR                  5       n	UR                  5       u  pU	S:X  a=  TR                  [        R                  R                  5        UR                  X45        GOUS:X  a  UR                  U	5        GOU" U5      U" U	5      :X  a<  TR                  U
5        [         R                  R                  R!                  X5        GOfU" U5      U" U	5      :  a~  U" U5      U" U	5      :  a1  UR                  5       u  pX-  U
-   n
X-  nU" U5      U" U	5      :  a  M1  TR                  U
5        [         R                  R                  R!                  X5        OU" U5      U" U	5      :  a  [        R                  R"                  nU	nTR                  [%        XU5      5        X-  nU" U5      U" U	5      :  aG  UR                  5       nTR                  [%        XU5      5        X-  nX-  n	U" U5      U" U	5      :  a  MG  [         R                  R                  R!                  X5        O[&        eU(       a
  U(       a  GMC  U(       al  UR                  5       n	[         R                  R                  R!                  U	S5        TR                  [        R                  R                  5        U(       a  Ml  U(       aE  UR                  5       u  p[         R                  R                  R!                  US5        U(       a  ME  UbB  [        U5      S:X  a3  TR)                  5         TR                  5       nTR+                  UU5        OTR)                  5         [        T5      [        U 5      :X  d   e    SUU4S jjnU$ s  snf )z7
Perform a reshape entirely by modifying indexing math
NrA   c                   >^ [        U 5      [        T5      :X  d   [        U 5      [        T5      45       e[        [        TU 5      5      m[        U4S jT 5       5      $ )Nc              3  <   >#    U  H  n[        UT5      v   M     g 7fr   )rj   )r   r   replacementss     r   r   AView._dynamic_reshape_indexer.<locals>.reindex.<locals>.<genexpr>d  s     HiA|44ir  )r   r   r   r   )r   r=  r`  	view_exprs    @r   r   .View._dynamic_reshape_indexer.<locals>.reindex_  sO     u:T*CSZT,CC*D% 01LHiHHHr   r  )rn   r  r  r  r   r   rh   r@   VIEWr   r   r  r  r   r  r  r2  r1  r?   r  reverseinsert)r  r  r7  r  r   	stack_new	stack_oldreordering_dense_dimold_dimsize_oldvarsize_newvar2	size_new2divisormodulus
dense_exprr   r`  r?  s                     @@r   r5  View._dynamic_reshape_indexer  s    GG$$..	 CHHBV
BVQ*499a8BV
 T8,-	N	 T! #S^a//#H" 	
  (((mmI.GW%	I }}H%MMOMC1}  .  #1Q  *8$	((;;  %  --hA8$y'::)Ih,??&/mmoOD/C/C'3H  )Ih,??   %  --hA8$y'::''++"  w!GH!+)Ih,??'mmoG$$_S7%KL%/G'1H	  )Ih,??
   --hA$$= II@  }}HGG))(A6UWW\\* i
 %MMOMCGG))(A6 i  S]a%7"JY
39~X...	I!	I	I 	I [
s   $Q5r   )r   r    r  r    r   r    )r   r   r  r  r   r   )r  r  r  r  r   ztuple[list[Expr], list[Expr]]r   )r  r  r  r  r7  r  r   &Callable[[Sequence[_T]], Sequence[_V]])r  r  r  r  r7  r  r   r  )r   r   r   r   r  r  r  rF  r   r  r%  r}  r5  r   r   r   r   r|  r|  n  s    
   i5  i5V " ",:"	&" "  
 $(	$ % !	
 
0    $(X X X !X 
4	X Xr   r|  c                     ^  \ rS rSr% SrS\S'   SU 4S jjrSS jr\rSS jr	SS jr
SS	 jr\SS
 j5       rSS jrSS jrSS jrSS jrSS jrSS jr\" S 5       S   SS jj5       rS S!S jjrS"S jrSrU =r$ )#rS  ii  z*Pretend our storage has a different layoutr  rN  c                   > [         TU ]  5         [        U R                  [        5      (       a0  [
        R                  U SU R                  R                  5       5        g g )NrM  )r  r  r   rM  ro  r   r  rp  r   s    r   r  ReinterpretView.__post_init__o  sC    dii**tVTYY-B-B-DE +r   c                P    U R                  U R                  U R                  /5      $ r   )r  rM  rN  r  s    r   r   ReinterpretView.__str__t  s&    		
 	
r   c                6    U R                   R                  5       $ r   r  r  s    r   rs  ReinterpretView.get_name~  r  r   c                .    U R                   R                  $ r   )rN  r  r  s    r   r  ReinterpretView.get_device  s    {{!!!r   c                    g r   r   r  s    r   r  ReinterpretView.get_origin_node  r  r   c                .    U R                   R                  $ r   )rN  r  r  s    r   r  ReinterpretView.dtype  s    {{   r   c                @    [        U R                  R                  5      $ r   )r   rN  r  r  s    r   r	  ReinterpretView.get_size  s    DKK$$%%r   c                @    [        U R                  R                  5      $ r   )r   rN  r  r  s    r   r2  ReinterpretView.get_stride  s    DKK&&''r   c                   ^  SU 4S jjnU$ )Nc                Z  > TR                   R                  5       n[        R                  " TR	                  5       U" U 5      5      nTR                   R
                  TR                  R
                  :w  a6  [        R                  " UTR
                  TR                  R
                  5      $ U$ r   )rN  r  rl   loadrs  r  rM  to_dtype_bitcast)r   r_  
tmp_loaderr  s      r   ra  +ReinterpretView.make_loader.<locals>.loader  sp    kk..0G$--/75>BJ{{  DIIOO3++J

DIIOOTT!!r   r   r  r   rm   r   r  ra  s   ` r   r  ReinterpretView.make_loader  s    	" r   c                6    U R                   R                  5       $ r   )rN  r  r  s    r   r  ReinterpretView.make_indexer      {{''))r   c                    U R                   $ r   rN  r  s    r   r  ReinterpretView.get_layout  r  r   c                    g r   r   r  s    r   r<  ReinterpretView.freeze_layout  r5  r   c                    [        U R                  R                  U5      [        U R                  R                  U5      -  [        U R                  R                  U5      -  $ r   )r(   rN  r  r  rQ  r`  s     r   ra  $ReinterpretView.get_free_symbol_uses  sQ    
 T[[--}=t{{11=ABt{{11=AB	
r   c           	     p   [         R                  R                  R                  U R                  U R
                  R                  U R
                  R                  U R
                  R                  Ub  UR                  O#[         R                  R                  R                  U R
                  R                  S9$ rQ  )rn   r  wrapper_codecodegen_reinterpret_viewrM  rN  r  r  rQ  	writeliner  r  s     r   r	  !ReinterpretView.codegen_reference  s     ww##<<IIKKKKKK & 2F8L8L8V8V++## = 
 	
r   c                    gr  r   r  s    r   rZ  ReinterpretView.num_reads      r   r   r~  r  r  r  r  r  r  r  r  r  r  r   r  r  )r   r   r   r   r  r   r  r   rE  rs  r  r  r  r  r	  r2  r  r  r  r<  rY   ra  r	  rZ  r   r  r  s   @r   rS  rS  i  s    4NF

 H$" ! !&(	* -.$)
!
	!
 /

 r   rS  c                  n    \ rS rSr% SrS\S'   \SS j5       rSS jr\r	\
SS j5       rSS jrSS	 jrS
rg)	DtypeViewi  z(Pretend our storage has a different typer  target_dtypec                    [        U5      (       aX  [        U5      u  p4[        UR                  UUR                  UR
                  UR                  UR                  5      n[        X5S9$ [        XS9$ )NrL  )rM  r  )
r
  rO  rP  r  r  r  rQ  rR  rS  r  )r  r   	new_dtyperV  rW  rY  s         r   r  DtypeView.create  sj     ##"7":G$!!!!!!$$J #CCa88r   c                P    U R                  U R                  U R                  /5      $ r   )r  rM  r  r  s    r   r   DtypeView.__str__  s     		4+<+<=>>r   c                    U R                   $ r   )r  r  s    r   r  DtypeView.dtype  s       r   c                6    U R                   R                  5       $ r   rM  r	  r  s    r   r	  DtypeView.get_size  r  r   c                P   ^ ^ T R                   R                  5       mSUU 4S jjnU$ )Nc                |   > [         R                  " T" U 5      TR                  TR                  R                  5      $ r   )rl   rf  r  rM  r  )r   r  r  s    r   ra  %DtypeView.make_loader.<locals>.loader  s*    ''c
D4E4EtyyWWr   r  rM  r  )r  ra  r  s   ` @r   r  DtypeView.make_loader  s(    		%%'	X 	X r   r   N)r   r   r  r  r   ro  r  r  r  r  )r   r   r   r   r  r   rF  r  r   rE  r  r  r	  r  r   r   r   r   r  r    sE    29 9? H! !$r   r  c                  r    \ rS rSr\          SS j5       r\  S             SS jj5       rSrg)		SliceViewi  c                x  ^ ^^^^	^
 [         R                  R                  m
UR                  5       U   m[	        S X4T4 5       5      (       a!  [
        R                  m	[
        R                  mOT
R                  m	T
R                  mSUU	U
4S jjm          SUU U4S jjnU" USTS5      nU" XCTT5      nX44$ )zb
Normalize start and end such that both are in the range
[0, x.get_size()[dim]] and start <= end.
c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r2   r   r   s     r   r   0SliceView.normalize_start_end.<locals>.<genexpr>  s     H1GA$Q''1Gr  c                   > TR                  X5      (       a  U OT" X5      nTR                  X25      (       a  UnU$ T" X25      nU$ r   )statically_known_geqr<  )r   lowerupperclamped_lowerclamped_fullmax_funcmin_funcr  s        r   clamp,SliceView.normalize_start_end.<locals>.clamp  s^    221<<(1BT 
 00FF  
   m3 
  r   c                D   > U c  U$ TR                  U T5      n T" XU5      $ r   )r  )r0  r  r  rw  r  r  dim_sizes       r   
clamp_wrap1SliceView.normalize_start_end.<locals>.clamp_wrap  s.     {++C:CU++r   r   )r   r    r  r   r  r   r   r    )
r0  zUnion[int, None]r  r   r  r   rw  Union[Expr, int]r   r  )
rn   r  r  r	  r{  r   MinMaxevaluate_minevaluate_max)r  r   rB  startendr  r  r  r  r  r  s   `     @@@@@r   normalize_start_endSliceView.normalize_start_end  s     77##::<$H%h1GHHHyyHyyH,,H,,H		  		 	,!	,*-	,69	,DT	,	, 	, 5!Xq1Xx8zr   c           	       ^^^^ [         R                  " T5      m[        T[        5      (       d  TS:  d   T5       e TS:X  a  US:  a  TS:X  a  U$ [        UR                  5       5      mU(       a  U R                  UTTU5      u  mn[        UT-
  TS-
  -   T5      TT'   [        U5      (       a  [        U5      u  px[        UR                  5      n	U	T   T-  U	T'   [        UR                  UR                  TU	UR                  UR                  T   T-  -   UR                   5      n
[#        XzS9$     SUUUU4S jjn[%        UTUS9$ ! [         a     GNf = f)Nr   l    rA   rL  c                   > [        U 5      [        T5      :X  d   SU  ST 35       e[        U 5      n U T   T-  T-   U T'   U $ )Nzwrong ndim r  )r   r   )r   rB  r  r  steps    r   r   !SliceView.create.<locals>.reindex@  sR     u:X.P+eWAhZ0PP.KEsd*U2E#JLr   r  r  )r   r#  r   r    	TypeErrorr   r	  r  r=   r
  rO  r  rP  r  r  rQ  rR  rS  r  )r  r   rB  r  r  r  r  rV  rW  rX  rY  r   r  s     `` `      @r   r  SliceView.create  sm    ||D!$%%7471	zcY.419 

%
 00CDJE3 uq!94@ ##"7":Gj//0J(o4JsO$!!  !!J$5$5c$:U$BB$$J #CC	!		 	 ah@@K  		s   E 
EEr   N)
r   r   rB  r   r  r   r  r   r   ztuple[int, int])rA   T)r   r   rB  r   r  r   r  r   r  r   r  r   r   r   )r   r   r   r   rF  r  r  r   r   r   r   r  r    s    '' '),'36'	' 'R  3A3A 3A 	3A
 3A 3A 3A 
3A 3Ar   r  c                  R    \ rS rSr% S\S'   S\S'   SS jrSS jrSS jrSS	 jrS
r	g)BaseConstantiL  r  r  r  r  c                    gNr   r   r  s    r   r	  BaseConstant.get_sizeQ  s    r   c                    U R                   $ r   r  r  s    r   r  BaseConstant.get_deviceT  r  r   c                    g r   r   r  s    r   r  BaseConstant.get_origin_nodeW  r  r   c                    [        5       $ r   r9   r  s    r   r  BaseConstant.get_readsZ  r  r   r   Nr  r  r  r  )
r   r   r   r   r   r	  r  r  r  r   r   r   r   r  r  L  s"    r   r  c                  R    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrSS
 jrSrg)Constanti^  r   r  r  r  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rl   rK  r  r  r   r  s    r   ra  $Constant.make_loader.<locals>.loadere  s    <<

DJJ77r   ri  r   rj  s   ` r   r  Constant.make_loaderd  s    	8 r   c                    g r   r   r  s    r   r  Constant.realizej  r5  r   c                @    [        U R                  U R                  US9$ )N)r  r  r  )r  r  r  r  s     r   rp  Constant.constant_to_devicem  s    djj

6JJr   r   Nr  r  r  )	r   r   r   r   r   r  r  rp  r   r   r   r   r  r  ^  s#    JKr   r  c                  H    \ rS rSr% S\S'   S\S'   S\S'   SS jrSS	 jrS
rg)IndexingConstantiq  r   r   r  r  r  r  c                   ^  SU 4S jjnU$ )Nc                Z   > [         R                  " TR                  TR                  5      $ r   )rl   r$  r   r  r  s    r   ra  ,IndexingConstant.make_loader.<locals>.loaderx  s    >>$**djj99r   ri  r   rj  s   ` r   r  IndexingConstant.make_loaderw  s    	: r   c                @    [        U R                  U R                  US9$ )N)r   r  r  )r  r   r  r  s     r   rp  #IndexingConstant.constant_to_device}  s    djj

6RRr   r   Nr  r  )r   r   r   r   r   r  rp  r   r   r   r   r  r  q  s    JSr   r  c                `   SnSn[        [        [        X5      5      5       H  u  pEUS:X  a  M  [        R                  R
                  R                  XR5      (       d0  [        R                  R
                  R                  XS5      (       d    gU[        R                  " SU5      -  nX$-  nM     gNrA   FT)	reversedr   r   rn   r  r  r=  r   r  )r  rA  expected_strideexpected_stride_maxr   ys         r   is_contiguous_strides_for_shaper    s     Os51236ww77
 
''""::1RRuyyA. 4 r   c                <    [         R                  U R                  -  $ r   )rB   padding_alignment_bytesitemsizer  s    r   get_align_for_dtyper    s    ))U^^;;r   c                  B    \ rS rSrSrSS jrS	S jr S
   SS jjrSrg)r  i  ztAbstract base for Layout, MultiOutputLayout, NoneLayout.
Represents the memory layout of the output of an Operation.c                >    [        [        U 5      R                  5      er   r  r  s    r   r  OutputSpec.get_device  r  r   c                >    [        [        U 5      R                  5      er   r  r  s    r   storage_sizeOutputSpec.storage_size  r  r   c                >    [        [        U 5      R                  5      er   r  r`  s     r   ra  OutputSpec.get_free_symbol_uses  rC  r   r   Nr  r  r  r  )	r   r   r   r   r  r  r  ra  r   r   r   r   r  r    s1    C77 %*7!7	!7 7r   r  c                     \ rS rSrSrS\" S5      S4             SS jjr\SS j5       r\R                  S S j5       r\SS	 j5       r
\
R                  S S
 j5       r
\S!S j5       r\R                  S"S j5       rS#S jr\rS$S jrS%S jrS&S jr\      S'S j5       rS&S jrS(S jrS&S jr\        S)S j5       rS*S jrS&S jrS+S jrS,S jrS-S jrS!S jr\" S 5       S.   S/S jj5       rSrg)0r  i  z_
Layout base class

Carries tensor meta-information including offset and
whether it is pinned.
Nr   Fc                ^   Uc  [         R                  U5      nXl        X l        [	        U5      [	        U5      :X  d   SU SU 35       e[        S U 5       5      (       d   eX0l        X@l        XPl        X`l	        U R                  (       a"  U R                  R                  S:X  d   S5       eg g )Nr  	, stride=c              3  N   #    U  H  n[        U[        [        45      v   M     g 7fr   )r   r    r   r   s     r   r   "Layout.__init__.<locals>.<genexpr>  s     <t!:a$--t   #%r   zOnly CPU tensors can be pinned)r  r  r  r  r   r   _size_stride_offsetrR  r   )r  r  r  r  r  rQ  rR  s          r   r  Layout.__init__  s     >#66t<F
4yCK'H5ix)HH'<t<<<<<
"NN(8(8E(A 	
,	
B(ANr   c                    U R                   $ r   r  r  s    r   r  Layout.size      zzr   c                    Xl         g r   r  r  r  s     r   r  r    s    
r   c                    U R                   $ r   r  r  s    r   r  Layout.stride      ||r   c                    Xl         g r   r  r  s     r   r  r        r   c                    U R                   $ r   r  r  s    r   rQ  Layout.offset  r  r   c                    Xl         g r   r  r  s     r   rQ  r    r  r   c                   SnU R                   S:w  a  SU R                    3nU R                  R                  c  SOSU R                  R                   3nSnU R                  (       a  SU R                   3n[	        U 5      R
                   SU R                  R                   U SU R                   SU R                   S	U R                   U U S
3$ )Nr  r   z	, offset=:z, is_pinned=z('z', z, size=r  r  )	rQ  r  r   rR  r   r   r  r  r  )r  rQ  device_index_stris_pinned_strs       r   r   Layout.__str__  s    ;;! .F!%!2!2!:2!DKKDUDUCV@W>>*4>>*:;MDz""#2dkk&6&6%78H7ITZZL YII;i}VH]O1N	
r   c                    U R                   $ r   r  r  s    r   r  Layout.get_device  r  r   c           	        [         R                     [        R                  " [	        U R
                  5      [	        U R                  5      U R                  U R                  U R                  S9sS S S 5        $ ! , (       d  f       g = f)N)r  r  
pin_memory)
rn   	fake_moder  r  r\   r  r  r  r  rR  r  s    r   get_exampleLayout.get_example  sN    [[&&'		2'4jj{{>> [[s   AA77
Bc                B    [        U R                  U R                  5      $ r   )r  r  r  r  s    r   rk  Layout.is_contiguous   s    .t{{DIIFFr   c                    [        U 5      nUS;  d	  U S   S:X  a  g[        U[        U 5      U 5       H  u  p4nUS:w  d  M  X4:w  d  M    g   g)N)r      rA   FT)r   r   r-   )rA  rU  ndimleftrightr  s         r   is_channels_last_contiguous"Layout.is_channels_last_contiguous  sY     5zvqQ!$*51"
D qyT]"
 r   c                    [        U R                  [        [        R	                  [        [        U R                  5      5      5      5      U R                  5       H  u  pnUS:w  d  M  X:w  d  M    g   gr  )r   r  r  r  r  r   r  )r  r  r  r  s       r   is_transposedLayout.is_transposed  sZ    !$KK^66tHTYY<O7PQRII"
D
 qyT]"
 r   c           	        [        U R                  5      [        U5      :X  d   e[        U R                  5       VVs/ s H5  u  p#[        R
                  R                  R                  USS9S:w  d  M3  UPM7     nnnU Vs/ s H  o R                  U   PM     nnU Vs/ s H  o!U   PM	     nnS	S jnU" U5      nS/[        U5      -  n[        [        U5      5       H  nXR   XqU   '   M     [        [        U5      S-
  5       H_  nXr   XrS-      :  n[        U[        5      (       d2  [        R
                  R                  R                  Xr   XrS-      :  SS9nU(       d  M_    g   gs  snnf s  snf s  snf )
Nr   r  rA   c                d    [        U 5      nU  Vs/ s H  o!R                  U5      PM     sn$ s  snf r   )r  r   )arr
sorted_arrelements      r   sorted_indices0Layout.is_stride_ordered.<locals>.sorted_indices,  s,    J=@AS'$$W-SAAAs   -r0  Tr"  F)r  r  r   r  )r   r  r   r  rn   r  r  r  r   r   r   
_shape_envr  )	r  r   r   rB  non_1_indicesr  r  stride_orderedr)  s	            r   ru  Layout.is_stride_ordered  sl   4;;3u:---
 $DII.
.ww11#1BaG . 	 
 +88-Q++a.-82?@-Qa-@	B
 u% E
*s5z"A'-yN8$ # s5zA~&A!$~!e'<<DdD))ww))77"%1u(==d 8  t ' ;
 9@s   2E)3E) E/E4c                    S/[        [        [        S[        U R                  5      S-
  5      5      5      -   n[        U5      /U-   nU R                  U5      $ Nr   rA   )r   r  r   r   r  ru  rE  s     r   is_channels_last_stride_ordered&Layout.is_channels_last_stride_orderedB  sN    d8E!S-=-A$BCDDUu$%%e,,r   c                  ^^ [        U5      n[        U 5      S:X  a  U $ [        R                  (       d  [        R                  X5      (       a  U $ [        R                  " 5       n[        US5      (       a#  UR                  R                  SS5      (       a  U $ [        S [        R                  " X5       5       5      (       + n[        R                  (       d	  U(       a  U $ [        [        R                  S5      (       a  [        R                  R                   OSmSU4S jjmT(       a  [#        U4S	 jU  5       5      (       a  U $ [%        U T5      n['        U5      n[)        [        U 5      5       Vs/ s H  nSPM     n	nS
XS   '   Sn
[+        US
S S
S9 H  u  pX{S
-
     nX   X   -  n[-        U[.        [0        R2                  45      =(       a!    U[        R4                  :  =(       a    X-  S:g  =(       d1    [-        U[0        R6                  5      =(       a    [        R                  nXU'   U(       d  M  [9        X5      U-  X'   Sn
M     U
(       d  U $ [:        =R<                  S
-  sl        U	$ s  snf )zv
The padding does not change stride order but makes sure all strides larger
than the threshold are multiple of align.
r   rb  dislike_paddingFc              3  b   #    U  H%  n[        U[        [        R                  45      v   M'     g 7fr   r   r   s     r   r   &Layout._pad_strides.<locals>.<genexpr>a  s*      
6 q3.//6r   r  Nc                   > Tc  g[        U [        R                  5      (       d  g[        U4S jU R                   5       5      $ )NFc              3  F   >#    U  H  nTR                  U5      v   M     g 7fr   )is_unbacked_symint)r   r   r   s     r   r   ILayout._pad_strides.<locals>.contains_unbacked_symints.<locals>.<genexpr>o  s!     R@Q1y33A66@Qs   !)r   r   r    r{  r1   )r)  r   s    r   contains_unbacked_symints6Layout._pad_strides.<locals>.contains_unbacked_symintsj  s7     dEJJ//R@Q@QRRRr   c              3  4   >#    U  H  nT" U5      v   M     g 7fr   r   )r   r   r-  s     r   r   r(  r  s     N:a6q99:r  rA   )r  T)r)  zsympy.Expr | intr   r   )r  r   rB   pad_channels_lastr  r  rn   get_current_noder  rb  r  r   r  chainpad_dynamic_shapesr  r  r{  r   r   r   r   r   r   r   r!   padding_stride_thresholdr    rZ   r&   num_comprehensive_padding)
in_stridesr  r  aligncurrent_fx_noderd   rq  r   r   new_stridespaddedrankr   prev_idxr  require_paddingr-  r   s                   @@r   _pad_stridesLayout._pad_stridesH  s-    $E*z?a''F,N,N-
 -
 ,,.?F++0D0D0H0Hu1
 1
   
__Z6
 
 

 ((Z*1!''<*H*HAGG&&d		S N:NNN'
I>,\:
"'J"89"8Qq"89 &'qM"":ab>;ID!(+H *T^;F 6C#78 (V<<<(Na'P VUZZ0NV5N5N	 
  &#*6#9E#A  <   	))Q.); :s   5I5c                    [        U [        5      (       d   [        U 5      5       eU R                  c   eU R	                  U R                  U R
                  U R                  5      U l        g r   )r   r  r   r  r>  r  r  r  s    r   rj  Layout.pad_strides  sP    $//;d;/{{&&&''TYY

Kr   c                P    [         R                  =(       a    [        U [        5      $ r   )rB   comprehensive_paddingr   r  r  s    r   ri  Layout.should_pad_strides  s    ++P
40PPr   c                   [        U [        5      (       a  U $ U R                  5       (       a  U R                  5         [        U R                  U R
                  U R                  U R                  U R                  U R                  5      $ r   )
r   rP  ri  rj  r  r  r  r  rQ  rR  r  s    r   as_fixedLayout.as_fixed  sf    dK((K""$$KKJJIIKKKKNN
 	
r   c                    [         R                  (       d   S[        U 5      R                   S35       eU R	                  5       R                  5       $ )Nzconvert z to FixedLayout first)r  r  r   r   rF  r  r  s    r   r  Layout.make_indexer  sG    ,, 	
tDz**++@A	
, }}++--r   c                   [        U[        5      =(       a    U R                  UR                  :H  =(       a    U R                  UR                  :H  =(       ay    U R                  UR                  :H  =(       aY    U R
                  UR
                  :H  =(       a9    U R                  UR                  :H  =(       a    U R                  UR                  :H  $ r   )r   r  r  r  r  r  rQ  rR  )r  others     r   __eq__Layout.__eq__  s    uf% 2u||+2

ekk)2 		UZZ'2 u||+	2
 u||+2 %//1	
r   c                X    [        U R                  U R                  U R                  5      $ r   )r*   r  r  rQ  r  s    r   r  Layout.storage_size  s    .tyy$++t{{SSr   c                    [        U R                  U5      [        U R                  U5      -  [        U R                  U5      -  $ r   )r(   r  r  rQ  r`  s     r   ra  Layout.get_free_symbol_uses  s=    
 TYY6t{{M:;t{{M:;	
r   )r  r  r  r  r  rR  r  )r  r  r  r  r  r  r  zOptional[Sequence[Expr]]rQ  r    rR  r   r   r   r  r  r  r   r   r  r  r    r   r   r  r  )r   torch.Tensorr  )rA  r  rU  r  r   r   )r   r  r   r   )r6  r  r  r  r  r  r   r  r~  r   rP  r  )rK  r   r   r   r  r  ) r   r   r   r   r  r!   r  r  r  setterr  rQ  r   rE  r  r  rk  r  r  r  ru  r#  r>  rj  ri  rF  r  rL  r  rY   ra  r   r   r   r   r  r    s    ,0qz

 
 	

 )
 
 
 

2   
[[    ]]    ]] 
 HG !,>	 "!F- L!L)7L@KL	L L\L
Q
.	
T H%$)
!
	!
 &
r   r  c                  "    \ rS rSrSrSS jrSrg)rP  i  z A Tensor layout we cannot changec                X    [        U R                  U R                  U R                  5      $ )r  )r+  r  r  rQ  r  s    r   r  FixedLayout.make_indexer  s    diidkkBBr   r   Nr  )r   r   r   r   r  r  r   r   r   r   rP  rP    s    *Cr   rP  c                    ^  \ rS rSrSrSrSS jr\SS j5       r\SS j5       r	\SS j5       r
\      SS j5       r\      SS	 j5       r\SS
 j5       r\R                  SS j5       r\SS j5       r\R                  SS j5       r\S S j5       r\R                  S!S j5       r S"     S#S jjr S"     S$S jjrS%S jrS&S jrS'S jrS(S jr  S)           S*U 4S jjjrSrU =r$ )+r  i  zp
A Tensor layout that we are allowed to change

Assumption: layout change should NOT add or remove free symbols
Fc                J    [         R                  " U 5      R                  5       $ )z
Compute what the strides would be if this layout were frozen,
without actually modifying the layout. This is used for speculative
stride computation during Triton template code generation.
)r  deepcopyrF  r  s    r   !get_fixed_layout_without_freezing0FlexibleLayout.get_fixed_layout_without_freezing  s     }}T"++--r   c                    [        U 5      S:X  a  / $ [        R                  R                  /n[	        U SS  5       H  nUR                  X!S   -  5        M     [        [	        U5      5      $ )Nr   rA   r0  )r   r   r  r1  r  r  r   )sizesreversed_stridesr  s      r   r  !FlexibleLayout.contiguous_strides  s^    u:?I!GGKK=U12Y'D##DB+?$?@ (H-.//r   c                    [        [        [        U 5      5      5      [        U5      :X  d   X45       e[        R                  R
                  nS/[        U5      -  nU H  nX#U'   X U   -  nM     U$ )z
Create a stride based on the order the dimensions should be filled in.

In this format, channels last would be:
    [1, 3, 2, 0]
N)r:   r   r   r   r  r1  )r`  r   next_striderU  r   s        r   fill_orderedFlexibleLayout.fill_ordered  sm     %E
+,
50AAQE>QAggkk&3u:%A$AJ%a0K  r   c                    [        [        [        U 5      5      5      [        U5      :X  d   e[        U5      n[        R                  X5      $ )zz
Create a stride based on the sorted order of a permuted range.

In this format, channels last would be:
    [3, 0, 2, 1]
)r:   r   r   r   r  re  )r`  r   r   s      r   r  FlexibleLayout.stride_ordered  s@     %E
+,
50AAAA,U3
**5==r   c                D   U[         R                  :X  a  [        R                  U [        5      $ U[         R
                  :X  a  [        R                  U [        5      $ U[         R                  :X  a  [        R                  U 5      $ [        R                  SU5        [        e)a9  
Create a stride based on a memory format.

Memory format is translasted into a stride order,
so channels_last is the same as:
    FlexibleLayout.stride_ordered(sizes, [3, 0, 2, 1])

This interface does not support memory_format `torch.preserve_format`
which should be used to deduce a format from another source
z>stride_ordered_for_memory_format, unsuppored memory_format: %s)r  channels_lastr  r  NHWC_STRIDE_ORDERchannels_last_3dNHWDC_STRIDE_ORDERcontiguous_formatr  r  r  r  )r`  memory_formats     r    stride_ordered_for_memory_format/FlexibleLayout.stride_ordered_for_memory_format  s     E///!008IJJe444!008JKKe555!44U;;IIP &%r   c                (   [        U 5      [        U5      :X  d   eU Vs/ s H,  n[        R                  R                  R	                  U5      PM.     nn[        [        [        U5      5      UR                  S9n[        R                  X5      $ s  snf )z
Create a stride that has the same stride order as given stride

For example, if given stride is [1000, 1, 100, 10],
the fill order should be [1, 3, 2, 0]
r  )
r   rn   r  r  rK  r  r   __getitem__r  re  )r`  r  r   r   s       r   same_orderedFlexibleLayout.same_ordered)  su     5zS[(((BHI&Q!''""55a8&IE#f+.F4F4FG
**5== Js   3Bc                    U R                   $ r   r  r  s    r   r  FlexibleLayout.size8  r  r   c                4    U R                  SU5        Xl        g )Nr  )!assert_free_symbol_uses_unchangedr  r  s     r   r  rw  <  s    ..vu=
r   c                    U R                   $ r   r  r  s    r   r  FlexibleLayout.strideA  r  r   c                4    U R                  SU5        Xl        g )Nr  )ry  r  r  s     r   r  r{  E      ..x?r   c                    U R                   $ r   r  r  s    r   rQ  FlexibleLayout.offsetJ  r  r   c                4    U R                  SU5        Xl        g )NrQ  )ry  r  r  s     r   rQ  r  N  r}  r   c                B   U R                  U R                  U5      nU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	r  r  ri  r>  r  rP  r  rQ  rR  )r  r   r@  rX  s       r   as_stride_orderFlexibleLayout.as_stride_orderS  sx     ((E:
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                   UnU R                  5       (       a-  U(       a&  U R                  X0R                  U R                  5      n[	        U R
                  U R                  U R                  UU R                  U R                  5      $ r   )ri  r>  r  r  rP  r  rQ  rR  )r  rN  r@  rX  s       r   as_exact_stridesFlexibleLayout.as_exact_stridesc  sg     #
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	re  r  ri  r>  r  rP  r  rQ  rR  )r  r   rX  s      r   as_fill_orderFlexibleLayout.as_fill_orders  st    $($5$5dii$G
""$$**:yy$**MJKKJJIIKKNN
 	
r   c                4   U R                  U R                  U5      nU R                  5       (       a&  U R                  X R                  U R                  5      n[        U R                  U R                  U R                  UU R                  U R                  5      $ r   )	rt  r  ri  r>  r  rP  r  rQ  rR  )r  r  rX  s      r   as_same_orderFlexibleLayout.as_same_order  st    &&tyy&9
""$$**:yy$**MJKKJJIIKKNN
 	
r   c           
     t    0 nS H/  nS H&  nX#4n[        [        [        X5      U5      5      X'   M(     M1     U$ )N)r  r  rQ  TF)r:   r(   r   )r  initial_free_symbolsr   ri  r  s        r   get_initial_free_symbol_uses+FlexibleLayout.get_initial_free_symbol_uses  sI    !0D!.+,6$WT%8-H-$) "/ 1 $#r   c                    S H9  nU R                   X4   n[        [        X#5      5      nXT:X  a  M.   SU SU 35       e   g )Nr  z)Expected free symbols unchanged, but got z vs )r  r:   r(   )r  r   r  ri  old_free_symbolsnew_free_symbolss         r   ry  0FlexibleLayout.assert_free_symbol_uses_unchanged  sX    *M#88$9NO)*:5*PQ#7 ;<L;MTRbQcd7 +r   c                   > U(       a  [         R                  X45      nO[         R                  U5      n[        TU ]  XX6US9  U R                  5       U l        g )NrR  )r  re  r  r  r  r  r  )r  r  r  r  rq  rR  rU  r  s          r   r  FlexibleLayout.__init__  sO     $11$EG$77=GK %)$E$E$G!r   )r  r  r  r  rU  )r`  r  r   r  )r`  r  r   r  r   r  )r`  r  r   r  r   r  )r`  r  ro  ztorch.memory_formatr   r  )r`  r  r  r  r   r  r  rR  r  rS  r  )r   r  r@  r   r   rP  )rN  r  r@  r   r   rP  )r   r  r   rP  )r  r  r   rP  )r   z$dict[tuple[str, bool], sympy.Symbol])r   r   r  r3   r   r   r  )r  r  r  r  r  r  rq  'Optional[Sequence[Union[int, Integer]]]rR  r   r   r   )r   r   r   r   r  r  r]  r  r  re  r  rp  rt  r  r  rV  r  rQ  r  r  r  r  r  ry  r  r   r  r  s   @r   r  r    s    N. 0 0    	> 	> &&-@&	& &4 >>&8>	> >   
[[    ]]    ]] 
 ;@
"
37
	
" HM
/
@D
	
 

	$ AEHH H 	H
 >H H 
H Hr   r  c                  p   ^  \ rS rSrSrSU 4S jjrS	S jrS
S jr\" S 5       S   SS jj5       r	Sr
U =r$ )NonOwningLayouti  z,Is a view into the storage of another tensorc                   > UR                  5       n[        TU ]	  UR                  UR                  UR
                  UR                  5        Xl        g r   )r  r  r  r  r  r  r  view)r  r  rN  r  s      r   r  NonOwningLayout.__init__  s?    "MMLLKKMM		
 	r   c                >    U R                  5       R                  5       $ r   )rF  r  r  s    r   r  NonOwningLayout.make_indexer  s    }}++--r   c                    U R                   R                  5       R                  nUS:X  a  gSSKJn  [
        R                  R                  R                  X5      $ )Nr   TrA   )	ALIGNMENT)	r  r  rQ  utilsr  rn   r  r  ry  )r  rQ  r  s      r   maybe_guard_aligned#NonOwningLayout.maybe_guard_aligned  sB    %%'..Q;$ww<<VOOr   c                R   [        U R                  [        5      (       d   eU R                  R                  n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eUR                  R                  U5      $ r   )	r   r  rS  rM  rq  r   rr  rN  ra  )r  ri  boxinput_buffers       r   ra  $NonOwningLayout.get_free_symbol_uses  s     $))_5555iinn#z**5DI5*xx,//:c:/""77FFr   )r  )r  zUnion[BaseView, TensorBox]r   r   r  r  r  r  )r   r   r   r   r  r  r  r  rY   ra  r   r  r  s   @r   r  r    sG    6.P -.$)G!G	!G /Gr   r  c                      \ rS rSrSrSrg)CommBufferTypei  symm_memr   N)r   r   r   r   SYMM_MEMr   r   r   r   r  r    s    Hr   r  c                  R   ^  \ rS rSr% SrS\S'   S\S'         S	U 4S jjrSrU =r$ )
CommBufferLayouti  a\  
A layout that signifies the buffer is a comm buffer.
In terms of striding, the layout is identical to `FixedLayout`.

Buffers with this layout do not participate in in-place reuse - it can be
neither the source nor the target for in-place reuse.

For detailed motivation and usage of this layout, see
NOTE [lowering-time collective optimization].
r  comm_buffer_typer   
group_namec           	       > [        U[        5      (       a  UR                  5       OUn[        TU ]  UR
                  UR                  UR                  UR                  UR                  UR                  S9  X l        X0l        g )Nr  r  r  r  rQ  rR  )r   r  rF  r  r  r  r  r  r  rQ  rR  r  r  )r  rN  r  r  fixedr  s        r   r  CommBufferLayout.__init__  sh     &0%G%G!V<<++<<<<oo 	 	
 !1$r   )r  r  )rN  z"Union[FlexibleLayout, FixedLayout]r  r  r  r   )	r   r   r   r   r  r   r  r   r  r  s   @r   r  r    s;    	 %$O%2% )% 	% %r   r  c                      \ rS rSr% S\S'   \R                  " S S9rS\S'   \R                  " S S9rS\S	'   SS
 jr	SS jr
SS jrSrg)
NoneLayouti  r  r  c                     S/$ r  r   r   r   r   r  NoneLayout.<lambda>  s    r   default_factoryr  r  c                     S/$ r  r   r   r   r   r  r  	  s    1#r   r  c                    gr  r   r  s    r   r  NoneLayout.storage_size  r}  r   c                    U $ r   r   r  s    r   rF  NoneLayout.as_fixed      r   c                    U R                   $ r   r  r  s    r   r  NoneLayout.get_device  r  r   r   Nr  r  r  )r   r   r   r   r   r  r  r  r  r  rF  r  r   r   r   r   r  r    sC     #"!''DD)D#))+FFIFr   r  c                     ^  \ rS rSrSU 4S jjr\SS j5       r\R                  SS j5       rSS jrSS jr	SS jr
\ S       SS jj5       rSS	 jrSS
 jrSrU =r$ )MutationLayoutSHOULDREMOVEi  c                  > [         TU ]  UR                  5       UR                  5       UR	                  5       S 5        Xl        U R                  5       R                  5       n[        R                  R                  U5        g r   )r  r  r  r  r	  r  
get_bufferrs  rn   r  mark_buffer_mutated)r  r  r   r  s      r   r  #MutationLayoutSHOULDREMOVE.__init__  sc    &&(OO		
  ))+	##D)r   c                6    U R                  5       R                  $ r   )real_layoutr  r  s    r   r  !MutationLayoutSHOULDREMOVE.stride!  s    !(((r   c                    g r   r   r  s     r   r  r  %  s    r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.storage_size)  s    !..00r   c                   ^ SU4S jjmT" U R                   5      n[        U[        5      (       d   [        U5      5       eU$ )Nc                   > [        U [        5      (       a  T" U R                  5      $ [        U [        5      (       a  T" U R	                  5       5      $ [        U [
        5      (       a  T" U R                  5      $ U $ r   )r   r  r  ro  rp  
MutableBoxrM  )r  unwrap_viewss    r   r  ;MutationLayoutSHOULDREMOVE.get_buffer.<locals>.unwrap_views-  sb    &"<==#FMM22&(++#F$6$6$899&*--#FKK00Mr   )r  r   r   r   )r  r   rr  r   )r  r  r  s     @r   r  %MutationLayoutSHOULDREMOVE.get_buffer,  s9    	 dkk*&&))74<7)r   c                h    U R                  5       R                  n[        U[        5      (       d   eU$ r   )r  rN  r   r  )r  rN  s     r   r  &MutationLayoutSHOULDREMOVE.real_layout:  s,    "))&&))))r   c                   UR                  5         [        R                  R                  UR	                  5       5        [        U[        5      (       a  UR                  nUR                  5         U(       d  [        R                  UR                  5       UR                  5       UR                  5       [        UR                  5       UR                  5       5       VVs/ s H.  u  pE[        R                  R                   R#                  XE5      PM0     snnS9n[        U[$        [&        45      (       d   eUR                  nUR                  5         [)        US5      (       d   U5       e[        UR                  R*                  [,        5      (       d$   [/        UR                  R*                  5      5       e[1        U5      UR                  l        UR                  $ s  snnf )Nrh  rM  )r  rn   r  r  rs  r   r   rM  r7  rO  r  r  r  r  r   r	  r  check_equals_and_simplifyro  r  r  rN  r  r   r  )r  srcdstunsafe_aliasr  r  r   s          r   realize_into'MutationLayoutSHOULDREMOVE.realize_into?  sV    	 	
##CLLN3c9%%((C 	##~~'mmo* !$CLLNCLLN C C GG$$>>qD C	 $ D dXz$:;;;;))CsF##(S(##((//>::QD<QQ:4S9xxs   5Gc                    U $ r   r   r  s    r   rF  #MutationLayoutSHOULDREMOVE.as_fixedg  r  r   c                6    U R                   R                  5       $ r   )r  r  r  s    r   r  'MutationLayoutSHOULDREMOVE.make_indexerj  rn  r   )r  )r  r   r   r   r  )r  r   r   r   r  )r   rr  r  r  )r  r   r  r   r  r   r   r   )r   r   r  )r   r   r   r   r  r  r  rV  r  r  r  rF  r  rF  r  r   r  r  s   @r   r  r    s    	* ) ) ]] 1
 <A%%%%59%	% %N* *r   r  c                    ^  \ rS rSr% S\S'   S\S'   S%U 4S jjrS&S jrS'S jrS(S	 jrS)S
 jr	S*S jr
\S+S j5       rS,S jrS-S jrS.S jrS/S jrS0S jrS1S jrS2S jrS%S jr S3     S4S jjrS5S jrS6S jr S3     S7S jjrS2S jrS8S jrS9S:S jjrS%S jrS;S jrS;S jrS<S jr\ " S 5       S3   S=S  jj5       r!S>S! jr"S?S" jr#S2S# jr$S$r%U =r&$ )@rr  in  r  r   r  rN  c                F   > [         TU ]  5         U R                  SS 5        g r  )r  r  r  r   s    r   r  Buffer.__post_init__x  s    t4r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  Buffer.make_indexer|  s     --//r   c                J    U R                   (       d   U 5       eU R                   $ r   r  r  s    r   rs  Buffer.get_name  s    yy$yyyr   c                    [        U R                  [        5      (       a  U R                  R                  5       $ [	        [        U R                  5      R                  5      er   )r   rN  r  r  r  r   r   r  s    r   r  Buffer.get_example  s@    dkk6**;;**,,!$t{{"3"<"<==r   c                >    U R                  5       R                  5       $ r   )r  r  r  s    r   r  Buffer.get_device  s    ##%0022r   c                    g r   r   r  s    r   r  Buffer.get_defining_op  r  r   c                6    U R                  5       R                  $ r   )r  r  r  s    r   r  Buffer.dtype  s     &&&r   c                :    / U R                  5       R                  Q$ r   )r  r  r  s    r   r	  Buffer.get_size  s    ("''((r   c                :    / U R                  5       R                  Q$ r   )r  r  r  s    r   r2  Buffer.get_stride  s    *"))**r   c                6    U R                  5       R                  $ r   )r  rQ  r  s    r   
get_offsetBuffer.get_offset  s     '''r   c                    [        U R                  [        5      (       a  U R                  $ [        [	        U R                  5      R
                  5      er   )r   rN  r  r  r   r   r  s    r   r  Buffer.get_layout  s7    dkk6**;;!$t{{"3"<"<==r   c                    U R                   $ r   rp  r  s    r   r  Buffer.get_output_spec  r  r   c                "    U R                  5       $ r   )r  r  s    r   r]  Buffer.get_storage_numel  s    ~~r   c                6    U R                  5       R                  $ r   )r  rR  r  s    r   get_is_pinnedBuffer.get_is_pinned  s     ***r   c                    [        U R                  [        5      (       a@  [        U R                  [        5      (       d   U R                  R	                  5       U l        g g g r   )r   rN  r  r  rF  r  s    r   r<  Buffer.freeze_layout  sF    dkk6**:KK4
 4
 ++..0DK4
*r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g Nrr  )r   rN  r  r   r  r?  s      r   rA  &Buffer.freeze_layout_with_stride_order  sB     $++~66IT[[8II6kk11%1Ur   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rN  r  r   r  rE  s     r   rF  $Buffer.freeze_layout_with_fill_order  s=    $++~66IT[[8II6kk//6r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  U5      U l        g r   )r   rN  r  r   r  rI  s     r   rJ  $Buffer.freeze_layout_with_same_order  s=    $++~66IT[[8II6kk//7r   c                    [        U R                  [        5      (       d   [        U R                  5      5       eU R                  R	                  XS9U l        g r  )r   rN  r  r   r  rM  s      r   rO  'Buffer.freeze_layout_with_exact_strides  sG     $++~66IT[[8II6kk22 3 
r   c                    [         R                  R                  R                  [        R
                  " U R                  5       S5      5      $ r  r  r  s    r   r   Buffer.is_zero_elements  r  r   c                z   ^  T R                  5       (       a  [        [        T R                  5       S9$ SU 4S jjnU$ )Nr  c                   > TR                  5       n[        R                  " TR                  =(       d    SU" U 5      5      $ rZ  )r  rl   re  r   r   r_  r  s     r   ra  "Buffer.make_loader.<locals>.loader  s/    '')G88DII2GENCCr   ri  )r   r   rM  r  rj  s   ` r   r  Buffer.make_loader  s3      ""=0@AA	D r   c                "    U R                  5       $ r   rs  r  s     r   r	  Buffer.codegen_reference  r  r   c                    g r   r   r  s    r   r  Buffer.decide_layout  r5  r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rN  r  r  rs  r  s    r   ry  #Buffer.get_inputs_that_alias_output  s2    dkk?33KK$$--/00r   c                    [        U R                  [        5      (       a%  U R                  R                  R	                  5       /$ gr  )r   rN  r  r  rs  r  s    r   rs  Buffer.get_mutation_names  s3    dkk#=>>KK&&//122r   c                6    [        U R                  5       /5      $ r   )r:   rs  r  s    r   r  Buffer.get_read_names  s    4==?+,,r   c                    [        5       $ r   r9   r`  s     r   ra  Buffer.get_free_symbol_uses       |r   c                    [        5       $ r   r9   r  s    r   r  Buffer.get_unbacked_symbol_defs  r  r   c                    g r   r   r  s    r   r  Buffer.realize  r5  r   c                    gr  r   r  s    r   should_allocateBuffer.should_allocate  s    r   rp  r~  r  r  )r   z!Union[torch.Tensor, torch.SymInt]r  r  r  r  )r   r  r  r  r  r  r  r  r  r  )r  r  r   r   )rN  r  r@  r   r   r   r  r   r  r  r  r  r  r  )'r   r   r   r   r   r  r  rs  r  r  r  r  r  r	  r2  r  r  r  r]  r   r<  rA  rF  rJ  rO  r   r  r	  r  ry  rs  r  rY   ra  r  r  r'  r   r  r  s   @r   rr  rr  n  s$    
50>
3 ' ')+(>
 +1 ;@V"V37V	V78
 CH
*
;?
	
U	

- H%$)!	! &
 r   rr  c                  J    \ rS rSrSS jrSS jr\R                  rS	S jrSr	g)
OperationBufferi  c                    U /$ r   r   r  s    r   r  OperationBuffer.get_outputs  s	    vr   c                    U $ r   r   r  s    r   r  OperationBuffer.get_defining_op  r  r   c                X    [         R                  U 5        [        R                  U 5        g r   )rr  r  r  r  s    r   r  OperationBuffer.__post_init__  s    T"%r   r   Nr  r   r  r~  )
r   r   r   r   r  r  r  rv  r  r   r   r   r   r*  r*    s     #55&r   r*  c                      \ rS rSrSS jrSrg)r  i  c                    gr  r   r  s    r   rZ  InputBuffer.num_reads	  r}  r   r   Nr  )r   r   r   r   rZ  r   r   r   r   r  r    s    r   r  c                      \ rS rSrSrSrg)DonatedBufferi  aA  
Represents a donated buffer which is a saved tensor that is not alias to any
fwd inputs, fwd user outputs, and bwd outputs. We generally cannot inplace
reuse the input tensor memory during backward since it might be used in another
function. However, donated buffer can be inplace reused during backward
to save memory.
r   N)r   r   r   r   r  r   r   r   r   r6  r6    s    r   r6  c                  8    \ rS rSr% SrS\S'   SS jrS	S jrSrg)
ri  i  Nr  rg  c                   ^  SU 4S jjnU$ )Nc                   > TR                  5       R                  5       n[        R                  " [        R
                  R                  TR                  5       TR                  5      U" U 5      5      $ r   )	r  r  rl   re  rn   r  constant_namers  rg  r  s     r   ra  *ConstantBuffer.make_loader.<locals>.loader  sP    oo'446G88%%dmmot7K7KL r   ri  r   rj  s   ` r   r  ConstantBuffer.make_loader  s    	 r   c                    [        [        R                  R                  U R	                  5       U5      U R
                  S9$ N)r   rN  )ri  rn   r  r:  rs  rN  r  s     r   rp  !ConstantBuffer.constant_to_device$  s/    &&t}}?
 	
r   r   r  r  )	r   r   r   r   rg  r   r  rp  r   r   r   r   ri  ri    s    .2O+2
r   ri  c                  l    \ rS rSrS	S jr\" S 5       S
   SS jj5       rSSS jjrSS jrSS jr	Sr
g)NoneAsConstantBufferi*  c                    [        5       $ r   r9   r  s    r   r  NoneAsConstantBuffer.get_reads,  r  r   c                    [        5       $ r   r9   r`  s     r   ra  )NoneAsConstantBuffer.get_free_symbol_uses/  r!  r   Nc                J    [         R                  R                  R                  $ r   )rn   r  rw  none_strr  s     r   r	  &NoneAsConstantBuffer.codegen_reference5  s    ww##,,,r   c                    [        S S9$ Nr  )r  r  s    r   r  $NoneAsConstantBuffer.get_output_spec8  s    &&r   c                    gr  r   r  s    r   r  &NoneAsConstantBuffer.has_tensor_output;  r  r   r   r  r  r  r   r  r  r  )r   r   r   r   r  rY   ra  r	  r  r  r   r   r   r   rA  rA  *  sC     23$)!	! 4
-'r   rA  c                  d    \ rS rSr% S\S'   \" S 5       S	   S
S jj5       rSSS jjrSS jrSr	g)r   i?  r    r)  c                .    [        U R                  U5      $ r   )r(   r)  r`  s     r   ra  *ShapeAsConstantBuffer.get_free_symbol_usesC  s      		=99r   Nc                h    [         R                  R                  R                  U R                  5      $ r   )rn   r  rw  codegen_sizevarr)  r  s     r   r	  'ShapeAsConstantBuffer.codegen_referenceI  s!    ww##33DII>>r   c                    gr  r   r  s    r   r  'ShapeAsConstantBuffer.has_tensor_outputL  r  r   r   r  r  r   r  r  )
r   r   r   r   r   rY   ra  r	  r  r   r   r   r   r   r   ?  s<    
J34$):!:	!: 5:
?r   r   c                    ^  \ rS rSr% SrS\S'   SrS\S'   SrS	\S
'   SrS\S'   Sr	S\S'   Sr
S\S'   \R                  S'S j5       r\\R                  S'S j5       5       rS(S jrS)S jrS*S jrS+S jrS,S jr\" S 5       S-   S.S jj5       rS/U 4S jjrS0S jrS1S jrS2S jrS3S jr\  S4S j5       r  S5     S6S jjr\ S7           S8S jj5       rS9S  jr S9S! jr!S(S" jr"S0S# jr#S0S$ jr$S:S% jr%S&r&U =r'$ );r  iP  zZ
Represents a buffer that is computed during kernel execution rather than being an input.
r  rM  FzClassVar[bool]_force_realizeNr  rP  Optional[Callable[..., Any]]rQ  r  rR  rS  c              #    #    U R                   c   eU R                  c   eU R                  c   eU R                  c   e[	        U R
                  [        5      (       d   [        U R
                  5       5       eU R
                  nU R                  n [        UR                  UR                  U R                  U R                  U R                  UR                  UR                  UR                  S9nX0l        [        UR                  UR                  U R                  5      U l        U R                  R!                  U 5        S v   Xl        X l        g ! Xl        X l        f = f7f)Nr  )rP  rQ  rR  rS  r   rM  r  r   rN  r  r  r  r  r  rP  get_default_sizes_bodyclear_cache)r  old_datarW  new_datas       r   with_original_inner_fn%ComputedBuffer.with_original_inner_fn_  s5    +++&&222$$000..:::$))Y//EDO3DE/99[[
	% nn00,,!%!@!@'66",,'66	H !I &%%DK
 ''33D9 I$K !I$Ks   BE%B5E E%E""E%c               #     #    [         R                  n  S[         l        S v   U [         l        g ! U [         l        f = f7fNT)r  rW  )	old_values    r   force_realizeComputedBuffer.force_realize  s2      #11		6,0N),5N)IN)s   ?/ ?<?c                    U R                   b  U R                   $ [        U R                  S5      (       a  U R                  R                   $ g)z}
Returns self.name if it exists, otherwise returns the name of the data node if that exists.
If neither exist, returns None.
Nr   )r   r  rM  r  s    r   get_computed_buffer_name'ComputedBuffer.get_computed_buffer_name  s:    
 99 99499f%%99>>!r   c                6    U R                   R                  5       $ r   rM  rZ  r  s    r   rZ  ComputedBuffer.num_reads  r  r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   r  ComputedBuffer.get_reads  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  ComputedBuffer.get_read_names  r  r   c                t   [        U R                  [        [        [        [
        45      (       d.  [        R                  " [        5       [        5       [        5       S9$ [        R                  " [        SS5         U R                  R                  5       (       aT  [        U R                  5       U R                  R                  5       U R                  R!                  5       5      sS S S 5        $ [        U R                  5       U R                  R#                  5       5      sS S S 5        $ ! , (       d  f       g = f)NrV  writesindex_exprsr  T)r   rM  r  r  r<  rO  rC   
ReadWritesr:   r   r   r  rd  rL   get_store_functionr  rg  r	  r  s    r   rR  ComputedBuffer.get_read_writes  s    $))itY%GHH** l!|&L  \\.*:DAyy++--*++-II002II002 BA +++-II&&( BAAs   :A*D).1D))
D7c                    U R                   R                  U5      U R                  R                  U5      -  nU R                  5       (       a!  X R	                  5       R                  U5      -  nU$ r   )rN  ra  rM  has_store_functionrR  )r  ri  r  s      r   ra  #ComputedBuffer.get_free_symbol_uses  sd    $ 11
II**=9: ""$$**,AA-PPFr   c                  > U R                  5       (       dg  U R                  [        R                  R                  ;  a?  U R                  5       S:X  a+  U R                  (       d  U R                  R                  5       $ [        TU ]!  5       $ r  )
rd  r   rn   r  mutated_buffersrZ  rW  rM  r  r  r   s    r   r  ComputedBuffer.make_loader  sc    ''))		!8!88 A%'' 99((**w"$$r   c                V    [        U R                  [        [        [        [
        45      $ r   )r   rM  r  r  r<  rO  r  s    r   rx  !ComputedBuffer.has_store_function  s    $))itY%GHHr   c                   U R                  5       R                  5       R                  5       n[        U R                  [
        [        [        45      (       a+  [        U R                  R                  U R                  U5      $ [        U R                  [        5      (       d   [        U R                  5      5       e[        U R                  R                  U R                  U5      $ r   )r  rF  r  r   rM  r  r  r<  r   r  r   rO  r   rb  )r  r_  s     r   ru  !ComputedBuffer.get_store_function  s    //#,,.;;=dii)T4!89949944diiIIdii33DT$))_D349911499gFFr   c                   [        U R                  [        5      (       Ga  [        R                  " U R
                  R                  5       U R
                  R                  5       5      u  u  pnU R                  5       R                  n[        S U 5       5      (       d   eU VVs/ s Hk  n[        U[        R                  5      (       d  M$  [        UR                  U Vs0 s H%  ofS:w  d  M
  U[        R                  R                   _M'     sn5      PMm     nnnU(       a  [        U R
                  ["        [$        45      (       a  U R
                  R'                  X5      nOUnU Vs/ s H,  n[(        R*                  R,                  R/                  X5      PM.     n	nSSKJn
  U
" XR5                  5       5      $ gs  snf s  snnf s  snf )aD  
If our layout is still flexible, try to determine the stride order based on stride orders of reads.

TODO(jansel): A better algorithm here would look at downstream consumers of this
              value and try to do global graph-level layout optimization.
              This is also something just begging to be autotuned.
c              3  v   #    U  H/  n[        U[        R                  [        R                  45      v   M1     g 7fr   )r   rC   StarDep	MemoryDepr  s     r   r   0ComputedBuffer.get_fill_order.<locals>.<genexpr>  s2      A 1|33\5K5KLMMs   79r   rA   pick_loop_orderN)r   rN  r  rC   r   rM  r  rg  rR  rV  r   r  rj   r   r   r  r  r  r<  r   rn   r  r  r  	schedulerr  r	  )r  
index_varsr  r   rV  r  vr  r)  stride_lengthsr  s              r   r   ComputedBuffer.get_fill_order  sx    dkk>22.:.M.M		,,.		0L0L0N/+(Z! ((*00E       Aa!7!78 Y
177n$WnUVPV_Q_n$WX   dii$66"ii//
KG(GMR"MRTAGG$$11$@U  " 7&~}}GG# %X"s*   %#F?F? 	F:-F:F?(3G:F?c                    [        U R                  [        5      (       a:  U R                  5       nU(       a  U R	                  U5        g U R                  5         g g r   )r   rN  r  r   rF  r<  rE  s     r   r  ComputedBuffer.decide_layout  sC    dkk>22'')E2259""$ 3r   c                p   [         R                  " U R                  5       U R                  5       SS9u  p[        R
                  " [        SU R                  5       5         [        U R                  5       U R                  5       (       a  UOUS S U/UQ76 nS S S 5        / n/ n/ n/ nUR                  5        Hf  u  pXS   ;   a-  U(       a   eUR                  U5        UR                  U	5        M:  XS   ;   d   eUR                  U5        UR                  U	5        Mh     Xg4WXE44$ ! , (       d  f       N= f)Nqr}   rg  rA   r   )rC   r   r  rg  r   r   ri  r  rN   ru  rd  itemsr  )
r  r   
var_rangesrq  r  reduce_vars
index_sizereduce_sizer  r   s
             r   rZ  %ComputedBuffer.get_default_sizes_body  s-    (::##%t'>'>'@
 \\.*;T__=NO'')0022Ra 	D P 
!#
$$&DAG|&&!!!$!!!$G|#|""1%""1% ' ($0III) POs   7D''
D5c                X  ^ ^^ T R                  5       u  u  p4nu  pgU(       a  U" X44XVU45      u  u  p4nu  pg/ UR                  R                  5       QmUb  [        U[        5      (       a  [        U5      S:X  d   eUu  p[        U[        5      (       d   [        U5      5       e[        U	[        5      (       d   [        U	5      5       e[        S U	 5       5      (       d   eUR                  n
X:X  d	   U
U45       eU	 Vs/ s H  oT;  d  M
  UPM     n	nTU	-  m/ UR                  5       Qm[        R                  R                  T [        R                   5      (       d  TR#                  UR%                  5       5                  SUUU 4S jjnXg-   n['        [)        T 5      5      (       + =(       d    [*        R,                  (       + nU" UUUU5      u  nnnU" X}XN5      u  nnn[.        R0                  " UUSS9u  u  nnn[3        UU" U5      U" U5      /UUU5      nUU4U4$ s  snf )a  
This is a main place where we do loop transformations in a
backend-agnostic way.

Here we:
    1) Remove any 1 dimensions
    2) Fuse contiguous dimensions together
    3) Reorder dimensions based on stride orders

Optional argument extra_indexing_constraints can be used to append additional
indexing expressions to existing ones derived from buffer's body. This can be useful
to fuse scheduler nodes with compatible ranges, e.g. (s0*s1*...,) and (s0, s1, s2, ...)
on CPU by preventing indexing simplifications and obtaining index/reduce ranges for
the scheduler node compatible with other nodes.
Optional argument recompute_sizes_body_func can be used to recompute sizes and body
on the default body. This can be useful to append additional loop transformations.
r   c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r    )r   fs     r   r   6ComputedBuffer.simplify_and_reorder.<locals>.<genexpr>c  s     H4Gqz!T**4Gr  c           	       > TR                  XUT5      u  pEnTR                  5       S:X  ae  [        U5      S:X  aV  [        [	        [        U5      5      5      nU" U5      S   S:w  a*  U Vs/ s H  oU   PM	     nn[        U5      n[        U5      nU" U 5      n U(       aD  [        R                  R                  R                  U U[        TX5      5      u  pIn
[        Xi5      nOUnXKU4$ s  snf )Nr  r   r   )_apply_loop_reorderingrd  r   r   r   r   r   rn   r  r  _simplify_loopsrG   r   )x_varssupport_varsr`  simplify_loopsnewsizesreindex0r   r   r   r   _pruner   index_formulasmemory_addrsr  s               r   simplify_and_reorderAComputedBuffer.simplify_and_reorder.<locals>.simplify_and_reordert  s     ,0+F+Fe\,(H* &&(E1c%jAoU3u:./ E?1%*278%Qa%H8+E2H.u5H f%F-.WW-=-=-M-M,^VN.*F
 *(="h..!  9s   +C*pr}   )
r  Sequence[sympy.Symbol]r  r  r`  r  r  r   r   dtuple[list[int], Callable[[Sequence[int]], Sequence[int]], Callable[[Sequence[int]], Sequence[int]]])rZ  indexing_exprsr   r   r   r   r   r   r   r   r  get_write_exprsrn   r  r  rD   PREFER_STORE_LOOP_ORDERextendget_read_exprsre   r  rB   loop_ordering_after_fusionrC   index_vars_no_squeezerN   )r  extra_indexing_constraintsrecompute_sizes_body_funcr  r  rq  r  r  extra_indexing_rangesextra_indexing_exprexpected_var_rangesr  r  r  should_merge_loopsiter_rangesiter_reindexr   reduce_rangesreduce_reindex	iter_varsr  r  r  s   `                     @@r   r  #ComputedBuffer.simplify_and_reorder5  s   4 '')		
%Z%Z %
 *)4k1J	))
 94..5578%15u==23q89 :T6!3T::WDAV<WW:1488S$?R:SS8H4GHHHHH"&//&? #%B ? /#.a>2I.   # 11N0--/0ww""4)O)OPP 3 3 564	/*4	/04	/ !4	/ !	4	/

4	/ 4	/l "/t,--VV5V5V1V 	 (<	(
$\1 ,@{,
(~q
 0</Q/Q0
, K*
 )$n[&AB
 ]+T11#s   	H'H'c           
     n   SSK Jn  Uc  / n U Vs/ s H-  n[        R                  R                  R                  X`U5      PM/     nn[        U5      [        U5      :X  a  [        US   5      [        U 5      :X  d   e[        [        U" XrU5      5      5      nU V	s/ s H  oU	   PM	     nn	U[#        U5      [%        U5      4$ s  snf ! [         a^    [        R                  (       a)  [        R                  S[        [        X5      5      U5        [        [!        [        U5      5      5      n Nf = fs  sn	f )zE
Shuffle the order of loops around to hopefully improve performance.
rA   r  r   z%Did not simplify complex index:
%s
%s)r  r  rn   r  r  r  r   r   r  	ExceptionrB   r  r  warningr   r   r   r   r   )
r  r  r`  r  priority_idxr  r)  rU  r   r   s
             r   r  %ComputedBuffer._apply_loop_reordering  s#    	/L	, )(D   --dM(   w<3|#44WQZCM :   /',"OPQE $))5aq5)l5)?5+AAA#  	,||=Z/0 
 s5z*+E	, *s*   C 4CAC D2C A%D/.D/c                6    U R                   R                  5       $ r   )rM  r  r  s    r   r  !ComputedBuffer.get_pointwise_size      yy++--r   c                6    U R                   R                  5       $ r   rM  rg  r  s    r   rg  !ComputedBuffer.get_reduction_size  r  r   c                6    U R                   R                  5       $ r   rM  rd  r  s    r   rd  !ComputedBuffer.get_reduction_type  r  r   c                6    U R                   R                  5       $ r   )rM  r   r  s    r   rm  ComputedBuffer.is_no_op  s    yy))++r   c                    gra  r   r  s    r   r'  ComputedBuffer.should_allocate   r  r   c                8    U R                   R                  U5      $ )rf  rM  rp  r  s     r   rp  !ComputedBuffer.constant_to_device  s    yy++F33r   rL  )r   Iterator[None]r  r  r  r  r  r  r  r  r  )r   zCallable[..., None])r   Optional[list[int]]r~  )r   zMtuple[tuple[list[Expr], list[Expr]], LoopBody, tuple[list[Expr], list[Expr]]]NN)r  *Optional[tuple[dict[Any, Any], list[Any]]]r  rX  r   z8tuple[tuple[list[Expr], list[Expr]], Optional[LoopBody]]r   )r  r  r  r  r`  r  r  zlist[sympy.Expr]r  r  r   r  r  r  )(r   r   r   r   r  r   rW  rP  rQ  rR  rS  r  r  r^  r  rc  rf  rZ  r  r  rR  rY   ra  r  rx  ru  r   r  rX   rZ  r  r  r  rg  rd  rm  r'  rp  r   r  r  s   @r   r  r  P  s    K%*NN* "&K%7;4;5929?C <C %  %D 6  6	%%** ,-$)!	! .2	%IG%N% J
J JD RVBFU2$NU2 $@U2 
B	U2n  -1%B*%B,%B %B '	%B
 *%B
%B %BN...,4 4r   r  c                     ^  \ rS rSrSr        SU 4S jjrSS jrSSS jjrSS jrSS jr	SS jr
  S     SS	 jjrS
rU =r$ )r  i  zh
Represents a Triton (in the future other type) of template operator
that we can fuse an epilogue onto.
c                   > [         TU ]  S US9  [        R                  U5      U l        X0l        [        R                  R                  U 5      U l	        [        R                  R                  U 5        0 U l        g r>  )r  r  r  unwrap_storagerj  make_kernel_renderrn   r  register_bufferr   register_operationr   )r  rN  rj  r  r  s       r   r  TemplateBuffer.__init__  s_     	d62"11&9"4GG++D1		""4(+-r   c                     U R                  SS9$ )NT	normalize)rL   r  s    r   rR  TemplateBuffer.get_read_writes  s    ''$'77r   c           
     z  ^^^ U R                  5       mU R                  5       R                  5       mSUU4S jjn[        R                  " X R                  5       SUS9nU R                   H  m[        T[        [        45      (       d   [        T5      5       e[        TR                  [        5      (       d   [        TR                  5      5       eTR                  R                  5       mSUU4S jjnU=R                  [        R                  " UTR                  5       SUS9R                  -  sl        M     U$ )Nc                b   > [        U5      S:X  d   e[        R                  " TT" U 5      S5      $ )Nr   fake)r   rl   r\  )r   r  r_  r   s     r   dummy1TemplateBuffer.extract_read_writes.<locals>.dummy#  s,    v;!###99T75>6::r   r   r  c                |   > [        U5      S:X  d   e[        R                  " TR                  5       T" U 5      5      $ r  )r   rl   re  rs  )r   r  r_  rl  s     r   r  r  1  s0    6{a'''xx??r   )r   Sequence[Any]r  r  r   r   )rs  r  r  rC   rL   r	  rj  r   rS  rr  r   rN  r  rV  )r  r  r  depsr_  rl  r   s       @@@r   rL   "TemplateBuffer.extract_read_writes  s    }}//#002	; 	; //==?B)
 ;;CcOV#<==HtCyH=cjj&11C4

3CC1jj--/G@ @
 JJ,::s||~rYeJ  r   c                6    [         R                  R                  $ r   )r   r  r1  r  s    r   rg  !TemplateBuffer.get_reduction_size<  s    ww{{r   c                    g r   r   r  s    r   rd  !TemplateBuffer.get_reduction_type?  r  r   c                    gra  r   r  s    r   r'  TemplateBuffer.should_allocateB  r  r   c                *    U R                  5       / 4S 4$ r   r  )r  r  r  s      r   r  #TemplateBuffer.simplify_and_reorderE  s$      
 	
r   )r   rj  r  r   )rN  r  rj  Sequence[IRNode]r  rX  r   r   r  r  )r  r   r   r  r  r  r  r  )r  r  r  rX  r   z<tuple[tuple[Sequence[Expr], list[Expr]], Optional[LoopBody]])r   r   r   r   r  r  rR  rL   rg  rd  r'  r  r   r  r  s   @r   r  r    s~    
.. !. 9	.
 
.8:
 RVBF
$N
 $@
 
F	
 
r   r  c                     ^  \ rS rSr  S           S	U 4S jjjr\" S 5       S
   SU 4S jjj5       rSS jrSS jrSS jr	Sr
U =r$ )TritonTemplateBufferiS  c                  > [         TU ]  XU5        X@l        U /U l        Ub  [	        U R
                  S   [        5      (       d   [        U R
                  S   5      5       eU R
                  S   R                  5       nU =R                  U Vs/ s H  n[        [        US9Xp5      PM     sn-  sl        U(       a  UO	[        5       U l        SU l        SU l        gs  snf )a  
NOTE:[TritonTemplates with multiple outputs]
We want the ability for TritonTemplates to output multiple tensors. Triton
kernels have no notion of outputs and this is done by creating tensors that
are then mutated by the kernel. Currently our STORE_OUTPUT codegen doesn't
support creating multinode outputs for triton templates.
We work around this by creating an extra input buffer during the lowering
and we mark them as mutated inputs.
Nr   r  )r  r  mutated_inputsoutputsr   rj  r   r   r  MutationOutputr  r:   allowed_prologue_inpssubgraph_inpssubgraph_outs)	r  rN  rj  r  r  r  r  r  r  s	           r   r  TritonTemplateBuffer.__init__T  s    " 	);<,&*V%dkk!nf55KtDKKN7KK5[[^..0FLL))C z8#D) L &;!
 	" SW?Cs   Cc                ,  > [         TU ]  U5      nU R                  (       a  U R                  O/ nU R                  (       a  U R                  O/ nU Hz  n[	        U[
        R                  5      (       a  UR                  [        XQ5      5        M>  [	        U[        5      (       a"  UR                  UR                  U5      5        Mu  Uc  Mz   e   U H?  n[	        U[        5      (       a"  UR                  UR                  U5      5        M:  Uc  M?   e   U$ r   )
r  ra  r  r  r   r   r    updater(   r   )r  ri  resr  r  rl  r   r  s          r   ra  )TritonTemplateBuffer.get_free_symbol_usesw  s     g*=9.2.@.@**b.2.@.@**b C#uzz**

+C?@C((

333MBC{"{ ! !C#v&&

333MBC{"{	 ! 
r   c                    U R                   $ r   r  r  s    r   r   TritonTemplateBuffer.get_outputs      ||r   c                    U R                   $ r   )r  r  s    r   get_allowed_prologue_inps.TritonTemplateBuffer.get_allowed_prologue_inps  s    )))r   c                &    SU R                    S3nU$ )NzTritonTemplateBuffer(layout=r  rp  )r  r   s     r   r   TritonTemplateBuffer.__str__  s    ,T[[M;
r   )r  r  r  r  r  r  )rN  r  rj  r  r  zOptional[Callable[_P, _T]]r  Optional[Iterable[IRNode]]r  zOptional[OrderedSet[str]]r   r   r  r  r  r  r  )r   r   r   r   r  rY   ra  r  r	  r   r   r  r  s   @r   r  r  S  s     6:;?!D!D !!D 7	!D
 3!D  9!D 
!D !DF 23$)!	! 4.* r   r  c                     ^  \ rS rSrSr          SU 4S jjrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSrU =r$ )ChoiceCalleri  a  
Represents a possible choice used in autotune_process.py.
During autotuning, self.benchmark() is first called to get benchmark result,
and if this choice is selected, self.output_node() is called to get the output_node.

Children classes: TritonTemplateCaller, CUTLASSTemplateCaller.
c                n   > [         TU ]  5         Xl        X0l        X l        X@l        SU l        0 U l        g r  )r  r  r   rN  r   descriptionfailedr   )r  r   r   rN  r	  r  s        r   r  ChoiceCaller.__init__  s9     		& '! ,.r   c                  ^^ U R                  5       m[        R                  (       a  [        UU4S j5      $ [        R
                  " TTSU0S S9$ )Nc                    > T " T6 $ r   r   )algor   s   r   r  (ChoiceCaller.benchmark.<locals>.<lambda>  s	    D$Kr   r   r  )to_callablerB   /profile_bandwidth_with_do_bench_using_profilingr^   rS   	benchmark)r  r   r   r	  s     `@r   r	  ChoiceCaller.benchmark  sA    !AA+,?@@$$T4%dKKr   c                    [         er   r  r  s    r   	call_nameChoiceCaller.call_name  r  r   c                    [         er   r  r  s    r   r	  ChoiceCaller.to_callable  r  r   c                "    U R                  5       $ )z
Hash key for the underlying kernel. By default, we assume there are no
runtime params, so kernel hash key defaults to choice caller's hash key.
)hash_keyr  s    r   kernel_hash_keyChoiceCaller.kernel_hash_key  s    
 }}r   c                    [         er   r  r  s    r   r 	  ChoiceCaller.hash_key  r  r   c                    [         er   r  r  s    r   re  ChoiceCaller.output_node  r  r   c                    0 $ )zRInformation returned here is logged to the autotune log file when that is enabled.r   r  s    r   	info_dictChoiceCaller.info_dict  s    	r   c                    g)Nunsupported_choicer   r  s    r   autoheuristic_idChoiceCaller.autoheuristic_id  s    #r   c                    SU l         g)zp
Mark the choice as failed so that it can be
removed later. Useful for when we decouple
compilation and tuning.
TN)r	  r  s    r   mark_failedChoiceCaller.mark_failed  s     r   )r   r	  r	  r   rN  r   )
r   r   r   r  rN  r  r	  r   r   r   )r   r   r   rT  r   rL  r  )r   r  )r   r   )r   z<dict[str, Union[PrimitiveInfoType, list[PrimitiveInfoType]]]r~  )r   r   r   r   r  r  r	  r	  r	  r!	  r 	  re  r(	  r,	  r/	  r   r  r  s   @r   r	  r	    sp    .. ". 	.
 . 
.(L""""$ r   r	  c                      \ rS rSrSS jrSrg)TritonTemplateCallerBasei  c                    [         er   r  r  s    r   get_make_kernel_render/TritonTemplateCallerBase.get_make_kernel_render  r  r   r   N)r   r   )r   r   r   r   r4	  r   r   r   r   r2	  r2	    s    "r   r2	  c                     ^  \ rS rSrSr            SU 4S jjr\SS j5       r\SS j5       r S   SS jjr	\
R                  SS j5       rSS jr S   SS	 jjr    SS
 jrSrU =r$ )MultiTemplateBufferi  a3  
Represents a Buffer with multiple backing implementation choices.

Choices can be TritonTemplates or ExternKernels. During scheduling if there is a potential
epilogue we will benchmark each of the choices with the epilogue to determine an implementation.
Otherwise, the fastest base choice will be chosen.
c                   > [         TU ]  UUS US9  X0l        0 U l        X@l        X l        [        S U 5       5      U l        0 U l        g )N)rN  rj  r  r  c              3     #    U  H]  n[        U[        5      =(       dA    [        U[        R                  R                  R
                  5      =(       a    UR                  v   M_     g 7fr   )r   r2	  r  r  select_algorithmExternKernelCallerhas_out_variant)r   choices     r   r   /MultiTemplateBuffer.__init__.<locals>.<genexpr>  sU      %
 - v78 65??#C#C#V#VW +**
 -s   A%A')	r  r  _choice_timings_fn_choice_timings_choicesoriginal_inputsr   _output_plannable_make_kernel_renders)r  rN  rj  choice_timings_fnunfiltered_choicesr  r  s         r   r  MultiTemplateBuffer.__init__  se     	#"7	 	 	
 #4OQ,>%!$ %
 -%
 "
 ?A!r   c                    U R                   $ )zN
Are all possible choices TritonTemplates or Extern Kernels with out variants
)rC	  r  s    r   output_plannable$MultiTemplateBuffer.output_plannable  s    
 %%%r   c                    U R                   $ r   )rA	  r  s    r   r  MultiTemplateBuffer.choices  s    }}r   c                z    XR                   ;  a  U R                  U5      U R                   U'   U R                   U   $ r   )r@	  r?	  )r  hint_overrides     r   choice_timings"MultiTemplateBuffer.choice_timings  s<      4 44262I2I-2XD  /##M22r   c              #  8  #    [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  UR                  :X  d   eU R                  nUR                  5       U l         S v   X l        g ! X l        f = f7fr   )	r   r  r  r:	  TritonTemplateCallerr   rN  r  r4	  )r  callerrenders      r   swap_as_triton_caller)MultiTemplateBuffer.swap_as_triton_caller  s     EOO44II
 
 	<	 
 {{fmm+++(("("?"?"A	-&,#f#s   BBB BBBc                \   [        U[        R                  R                  R                  5      (       d   [        U5      5       eU R                  5       UR                  R                  :X  d   eU R                  5       UR                  R                  :X  d   eUR                  5       U l        g r   )r   r  r  r:	  rR	  r   r	  rN  r  r2  r  r4	  r  )r  rS	  s     r   finalize_as_triton_caller-MultiTemplateBuffer.finalize_as_triton_caller-  s    EOO44II
 
 	<	 
 }}&--"4"4444 FMM$8$8888"("?"?"Ar   c                R    U R                  US9n[        X"R                  S9nX2U   4$ )N)rN	  r  )rO	  r|  r  )r  rN	  timings
min_choices       r   get_min_choice"MultiTemplateBuffer.get_min_choice5  s3     %%M%Bkk2
J/00r   c                    UR                  5        H"  u  p#UR                  5       U R                  U'   M$     U R                  S   U l        g)z;Finalize with multiple callers for different hint overridesN)r  r4	  rD	  r  )r  callersrN	  rS	  s       r   finalize_as_triton_callers.MultiTemplateBuffer.finalize_as_triton_callers<  sE     &-]]_!M7=7T7T7VD%%m4 &5 #'";";D"Ar   )r@	  r?	  rA	  rD	  rC	  r  rB	  )rN  r  rj  r  rE	  z4Callable[[Optional[int]], dict[ChoiceCaller, float]]rF	  list[ChoiceCaller]r  r  r   r   r  )r   rc	  r   )rN	  r  r   zdict[ChoiceCaller, float])rS	  r2	  r   r  )rS	  r2	  r   r   )rN	  r  r   ztuple[ChoiceCaller, float])r`	  z-dict[Optional[int], TritonTemplateCallerBase]r   r   )r   r   r   r   r  r  r  rI	  r  rO	  r  r  rU	  rX	  r]	  ra	  r   r  r  s   @r   r7	  r7	    s    AA !A P	A
 /A  /A 
A8 & &   .23*3	"3 - -B .21*1	#1BDB	B Br   r7	  c                  \   ^  \ rS rSr              SU 4S jjrSS jrSS jrSrU =r$ )	CUTLASSTemplateBufferiG  c                J   > [         TU ]  XU5        X@l        XPl        X`l        g r   )r  r  workspace_sizetemplatesupports_epilogue_fusion)r  rN  rj  r  rg	  rh	  ri	  r  s          r   r  CUTLASSTemplateBuffer.__init__H  s&     	);<, (@%r   c                8    U R                   b  U R                   $ S$ r  rg	  r  s    r   r  (CUTLASSTemplateBuffer.get_workspace_sizeW  s    &*&9&9&Et""L1Lr   c                ~    U R                  5        H)  n[        R                  " UR                  5       S S 5        M+     g r   )r  rl   r\  rs  )r  r\  s     r   emulate_store_fn&CUTLASSTemplateBuffer.emulate_store_fnZ  s,    &&(FIIfoo't4 )r   )ri	  rh	  rg	  )rN  r  rj  r  r  Callable[_P, _T]rg	  r   rh	  rr   ri	  r   r   r   r  r~  )	r   r   r   r   r  r  ro	  r   r  r  s   @r   re	  re	  G  sd    AA !A -	A
 A "A #'A 
AM5 5r   re	  c                  T   ^  \ rS rSr            SU 4S jjrSU 4S jjrSrU =r$ )CppTemplateBufferi_  c                L   > [         TU ]  XU5        X@l        XPl        S U l        g r   )r  r  rh	  r=	  r  )r  rN  rj  r  rh	  r=	  r  s         r   r  CppTemplateBuffer.__init__`  s&     	);< /3r   c                  > [        U R                  [        5      (       a  [        U R                  [        5      (       d   [        U R                  5      5       eU R                  S   n[        U[        5      (       d   [        U5      5       eUR                  n[        U[        5      (       d   [        U5      5       eU$ [        TU ]%  5       $ r  )
r   rN  MultiOutputLayoutr  r   r   rr  r  r  r  )r  first_outputrN  r  s      r   r  CppTemplateBuffer.get_layoutm  s    dkk#455dllH55ItDLL7II5<<?LlF33GT,5GG3!((Fff--;tF|;-M7%''r   )r=	  r  rh	  )rN  r  rj  r  r  rq	  rh	  rr   r=	  r   r   r   r  )r   r   r   r   r  r  r   r  r  s   @r   rs	  rs	  _  sL    44 !4 -	4
 "4 4 
4
( 
(r   rs	  c                  V   ^  \ rS rSrSr S           SU 4S jjjrSS jrSrU =r$ )	CuteDSLTemplateBufferiz  z
Buffer for CuteDSL (CUTLASS Python DSL) template kernels.
Similar to other template buffers but specialized for CuteDSL operations.
c                  > [         TU ]  XU5        X@l        XPl        U /U l        Ub  [        U R                  S   [        5      (       d   [        U R                  S   5      5       eU R                  S   R                  5       nU =R                  U Vs/ s H  n[        [        US9Xp5      PM     sn-  sl        g g s  snf )Nr   r  )r  r  rh	  r  r  r   rj  r   r   r  r  r  )	r  rN  rj  r  rh	  r  r  r  r  s	           r   r  CuteDSLTemplateBuffer.__init__  s     	);< ,&*V%dkk!nf55KtDKKN7KK5[[^..0FLL))C z8#D) L &s   B>c                    U R                   $ r   r	  r  s    r   r  !CuteDSLTemplateBuffer.get_outputs  r	  r   )r  r  rh	  r   )rN  r  rj  r  r  rq	  rh	  r   r  r	  r   r   r  )	r   r   r   r   r  r  r  r   r  r  s   @r   r{	  r{	  z  s[     6: ! -	
  3 
 * r   r{	  c                     ^  \ rS rSrSr     S                     S	U 4S jjjrS
S jrSS jr S     SS jjrSr	U =r
$ )NVUniversalGemmBufferi  z
Buffer for NVIDIA Universal GEMM kernels.

Unlike CuteDSL templates which use Jinja templates, this generates
simpler Python code that directly calls the cutlass_api library.
c                  > [         TU ]  XS S9  X0l        X@l        U /U l        X`l        XPl        Xpl        Xl        Xl	        Xl
        UR                  R                  UR                  R                  S.U l        U R                  U l        g )N)r  )kernel_namemin_cc)r  r  kernelaccumulator_typer  rg	  variantscale_type_ascale_type_bswizzle_type_aswizzle_type_bmetadatar	  r	  kernel_metadata_make_kernel_renderr  )r  rN  rj  r	  r	  r	  rg	  r	  r	  r	  r	  r  s              r   r  NVUniversalGemmBuffer.__init__  s     	DA 0&*V,((,, "??66oo,, 
 #'":":r   c                    U R                   $ )z#Return the workspace size in bytes.rl	  r  s    r   r  (NVUniversalGemmBuffer.get_workspace_size  s    """r   c                    U R                   $ r   r	  r  s    r   r  !NVUniversalGemmBuffer.get_outputs  r	  r   c                  ^	 SSK Jn  SSKJn  / nU R                   HV  n[        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  nUR                  U5        MX     [        UR                  5      nU" UUUU R                  U R                  U R                  U R                  U R                   U R"                  U R$                  U R&                  S9m	U	4S jnT	U4$ )z
Create a kernel renderer for code generation.

Returns (kernel, render) tuple where:
- kernel: NVUniversalGemmKernel object with call_kernel() method
- render: function that returns source code string
r   )NVUniversalGemmKernel)Placeholder)r	  r   re  r	  r	  rg	  r	  r	  r	  r	  r	  c                 $   > T R                  5       $ r   )rT	  )render_kernels   r   rT	  9NVUniversalGemmBuffer._make_kernel_render.<locals>.render  s     ''))r   )Btorch._inductor.codegen.nv_universal_gemm.nv_universal_gemm_kernelr	  torch._inductor.utilsr	  rj  r   r   rM  rq  r  r   KERNEL_NAMEr	  r	  rg	  r	  r	  r	  r	  r	  )
r  out_noderN	  r	  r	  r   rl  r	  rT	  r	  s
            @r   r	  )NVUniversalGemmBuffer._make_kernel_render  s    	
 	6!#;;C#y))hh#z**hhs#  +112-##  00!22..LL****....
	* f$$r   )r	  r	  r	  r  r  r	  r	  r	  r	  r	  rg	  )r   NNNN)rN  r  rj  r  r	  r   r	  r   r	  r   rg	  r   r	  Optional[Any]r	  r	  r	  r	  r	  r	  r   r   r  r  r   )r	  r   rN	  r  r   ztuple[Any, Any])r   r   r   r   r  r  r  r  r	  r   r  r  s   @r   r	  r	    s      &*&*(,(,;; !; 	;
 ; ; ; $; $; &; &; 
; ;B# =A*%*%,9*%	*% *%r   r	  c                &    [        S U  5       5      $ )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r   r   r   r  s     r   r   #is_node_sequence.<locals>.<genexpr>  s     4ez!V$$er  )r   )r   s    r   is_node_sequencer	    s     4e444r   c                      \ rS rSr% S\S'   SS jrSS jrSS jr\SS j5       r	\
    SS j5       rSS	 jrSS
 jr\" S 5       S   SS jj5       rSrg)r  i  )Sequence[Union[IRNode, Sequence[IRNode]]]rj  c                n    U R                   U   n[        U[        5      (       d   eUR                  5       $ r   rj  r   r   rs  )r  r   inputs      r   
input_nameInputsKernel.input_name   s/    A%((((~~r   c                  ^ [         [        R                     " 5       n[        R                  mU R                   Hq  n[        U[        5      (       a  UR                  U4S jU 5       5        M5  [        U[        5      (       a  ML  UR                  T" UR                  5       5      5        Ms     [         [        R                     " U4S jU R                  5        5       5      n[        R                  " UU[        5       S9$ )Nc              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r   r   r  s     r   r   /InputsKernel.get_read_writes.<locals>.<genexpr>
  s     BEqWQZZ\22E   #&c              3  P   >#    U  H  nT" UR                  5       5      v   M     g 7fr   r  )r   r  r  s     r   r   r	    s#      .
/AGCLLN##/Ar	  rq  )r:   rC   rI   r  rj  r   r   r  r   r  rs  r  rt  )r  rV  r	  rr  r  s       @r   rR  InputsKernel.get_read_writes  s    <++,.&&[[E%**BEBBE#899		'%.."234 ! L,,- .
/3/?/?/A.
 
 &&"
 	
r   c                6    U R                  5       R                  $ r   rU  r  s    r   r  InputsKernel.get_reads  rX  r   c                   [        U[        5      (       a  UR                  n[        U[        5      (       a  UR                  n[        U[        5      (       a*  [        U[
        5      (       d  [        R                  U5      n[        U[        5      (       a  U R                  U5      $ [        U[        5      (       a  U$ [        U[        [
        45      (       d   [        U5      5       eU$ r   )r   r   rM  rq  ro  rS  r  realize_inputunwrap_storage_for_inputTorchBindObjectrr  r   r  r   s     r   r	  %InputsKernel.unwrap_storage_for_input  s    a##Aa$$Aa"":a+I+I**1-Aa##
 //22a))H!fo677@a@7r   c                    / nU  Hd  n[        U[        5      (       a&  U Vs/ s H  n[        R                  U5      PM     nnO[        R                  U5      nUR	                  U5        Mf     U$ s  snf r   )r   r   r  r	  r  )rj  
inputs_newr   r   s       r   r  InputsKernel.unwrap_storage1  sm     =?
A!X&&GHIq!\::1=qI 99!<a   	 Js   A/c                    gra  r   r  s    r   rj  InputsKernel.is_extern>  r  r   c                    gr  r   r  s    r   rZ  InputsKernel.num_readsA  r}  r   c                    [         [        R                     " 5       nU R                   HI  n[	        U[
        5      (       a  X#R                  U5      -  nM-  U H  nX$R                  U5      -  nM     MK     U$ r   )r:   r   r"   rj  r   r   ra  )r  ri  r  rl  	inner_inps        r   ra  !InputsKernel.get_free_symbol_usesD  sg     u||$&;;C#v&&--m<<!$I77FFA "%	  r   r   N)r   r   r   r   r  r  r$  )rj  r	  r   z%list[Union[IRNode, Sequence[IRNode]]]r  r  r  r  )r   r   r   r   r   r	  rR  r  rF  r	  r  r  rj  rZ  rY   ra  r   r   r   r   r  r    s    55 

,,  $ 
9
	.
 
 N+$)
!
	!
 ,
r   r  c                  (    \ rS rSrSS jrSS jrSrg)	NopKerneliR  c                    gra  r   r  s    r   rm  NopKernel.is_no_opS  r  r   c                    [        5       $ r   r9   r  s    r   r  NopKernel.get_readsV  r  r   r   Nr  r  )r   r   r   r   rm  r  r   r   r   r   r	  r	  R  s    r   r	  c                      \ rS rSrSr\S
S j5       r\ S     SS jj5       r\" S 5       S   SS jj5       r	\SS j5       r
SS jrS	rg)ConcatKerneliZ  zb
There isn't actually a real kernel for concat, we just change the
storage for the upstream data.
c                   US   R                  5       nUS   R                  5       n[        US   R                  5       5      nS/nXR   /nSUs=::  a  [	        U5      :  d   e   e[        S[	        U5      5       H  nX   R                  5       n	UR                  XR   5        [	        U	5      [	        U5      :X  d   eX   R                  5       U:X  d   eX   R                  5       U:X  d   e[        [	        U5      5       HE  n
X:X  a  XZ   X   -   XZ'   M  [        R                  R                  R                  XZ   X   5      XZ'   MG     UR                  XR   5        M     [        R                  U5      n[        R                  (       a#  [        R!                  XUS   R"                  5      n[        [	        U5      5       H|  nX   n[%        U5      (       d  M  UR'                  5       n[)        U[*        5      (       d  M@  [        R-                  UR.                  UR0                  5      (       d  Mq  [3        U5      n  O   [5        S U 5       5      n[        R                  R6                  R8                  S   n[)        U[        5      (       d   [;        U5      5       eUSL a"  [5        S U 5       5      (       a  [3        U5      n[=        S U 5       5      nUc   e[?        S[+        UUUUUS9/ S	9n[A        U5      n/ n[C        U5       GH  u  nn[)        U[D        [F        45      (       d   [;        U5      5       eU RI                  U[J        RM                  UX&U   Xx   SS
95      n[)        U[N        5      (       d   [;        U5      5       e[)        URP                  [        5      (       d   [;        URP                  5      5       eURP                  R                  U5        [)        URR                  [D        5      (       a  URR                  RU                  5       nOURR                  n[)        U[@        5      (       d  GM4  URW                  5       (       d  GML  UR                  5       =nc  GMb  [Y        UR:                  5      (       d  GM  [[        U5      (       a  GM  UR                  UR]                  5       5        GM     [	        U5      S:  aR  [        R                  R_                  U[`        Rb                  5      (       a  [        R                  Re                  U5        [        R                  Rg                  U5      Ul4        U Rk                  URP                  5      Ul(        [        R                  Rm                  U5        U$ )z&
Create the concat kernel from inputs
r   rA   c              3  8   #    U  H  n[        U5      v   M     g 7fr   )r
  r  s     r   r   &ConcatKernel.create.<locals>.<genexpr>  s     -WPV1.CA.F.FPVr  Fc              3    #    U  Hv  nS UR                   ;   =(       a[    UR                   S    R                  [        R                  S9=(       d*    UR                   S    R                  [        R                  S9v   Mx     g7f)r0  ro  N)rb  rk  r  rj  rl  r   args     r   r   r	    st      
<
 $ SXX  --E<O<O-P W88E?00u?U?U0V $s   A>B c              3  z   #    U  H1  n[        U5      =(       a    UR                  5       R                  v   M3     g 7fr   )r
  r  rR  r  s     r   r   r	    s-      
KQa!!$A)A)AA6s   9;N)r  r  r  r  rR  r   rN  rj  r  )7r  r  r   r	  r   r   r  rn   r  r  r  r  r  rB   rC  r  r>  r  r
  r  r   rP  r  r  r  r-   r{  current_noder   r   r   r	  rq  r   ro  r  r  r  r  rr  rj  rM  rp  r)  re   rd   rv  r  rD   FOREACHregister_operation_listr  r   r  r  )r  rj  rB  r  r  r  offsets_startoffsets_endr   
input_sizer  output_strider   rN  any_input_is_storage_and_layoutfx_node_argsrR  concat_kernelr	  op_namesrl  r  input_unwrappeddevs                           r   r  ConcatKernel.create`  s   
 %%'q	##%q	**,-}oC'#h-'''''q#f+&A++-J  /z?c(m3339&&(E1119'')V3333x=)8"*+
"=HK"#''"2"2"L"L Z]#HK	 * x}- ' (6'H'H'R''"//M
 s6{#A	A$Q''K 88fmmTT$B8$LM $ +.-WPV-W*W'ww++003,--AtL/AA-*e3 
<
 $
<
 
9
 
9
 ;8DM 
KQ
 
	 !!!$$# 

 M*'FAscHj#9::EDIE:++  Cq!1;> ! L lF33GT,5GG3m22D99U4@T@T;UU9  ''5#((H--"%(("6"6"8"%(( ?J77#3355NN,,S9388$$"<00 ? ? AB1 (4 x=1!4!4V^=S=S!T!TGG++H5WW44]C"11-2F2FG	""=1r   Nc                2   [        U[        5      (       a  U R                  UR                  U5      $ [        U[        [
        45      (       d   [        U5      5       e[        UR                  [        5      (       a  [        UR                  R                  [        5      (       a  UR                  R                  (       d  gUc  g[        UR                  5       5      [        UR                  5       5      :w  a  g[        S [        UR                  5       UR                  5       5       5       5      $ [        UR                  S5      =(       aJ    [        UR                  R                  [         5      =(       a    [        UR                  ["        5      (       + $ )NFTc              3  x   #    U  H0  u  p[         R                  R                  R                  X5      v   M2     g 7fr   rH  rI  s      r   r   =ConcatKernel.can_realize_into_without_copy.<locals>.<genexpr>  s1      EFB   88@@ErK  rN  )r   r   can_realize_into_without_copyrM  ro  rq  r   r7	  rN  rP  rI	  r   r2  r   r   r  r  ExternKernelAlloc)r  r  r  s      r   r	  *ConcatKernel.can_realize_into_without_copy  s!    c9%%44SXXsCC#*566AS	A6chh 344sxx<<xx00 { 3>>#$CNN,<(== !#.."2CNN4DE   CHHh' <388??N;<sxx):;;	
r   c                ,    [         R                  X5      $ r   )r	  ra  r`  s     r   ra  !ConcatKernel.get_free_symbol_uses  s     --dBBr   c                   [        U[        5      (       d&  [        U5      (       a  [        U5      u  p4[        X4S9n[        U[        5      (       d   [	        U5      5       e[        U[
        5      (       a  U R                  UR                  U5      $ [        U[        5      (       ai  UR                  5         [        UR                  S5      (       d   eU R                  X5      (       a&  [        U5      UR                  l        UR                  $ [        R                  UR!                  5       UR#                  5       UR%                  5       ['        UR)                  5       UR)                  5       5       VVs/ s H.  u  pV[*        R,                  R.                  R1                  XV5      PM0     snnS9nU R                  Xr5      $ s  snnf )NrL  rN  rh  )r   rS  r
  rO  r   r   r  rM  rq  r  r  r	  r  rN  rO  r  r  r  r  r   r	  rn   r  r  r  )r  r  r  rV  rN  r  r  pws           r   r  ConcatKernel.realize_into  sZ   
 #//$S))"7"<%7B#//:c:/c9%%##CHHc22c:&&KKM388X....00::"1#"6xx>>#--/__&  ??DA   ::1@?	  
 ((s   75Gc                    gra  r   r  s    r   r'  ConcatKernel.should_allocate  r  r   r   )rj  r  rB  r   r   rq  r   )r  r   r  r  r   r   r  r  )r  r   r  r   r   r   r  )r   r   r   r   r  rF  r  r	  rY   ra  r  r'  r   r   r   r   r	  r	  Z  s    
 o ob 26!
!
/!
	!
 !
F N+$)C!C	!C ,C
 ) )Br   r	  c                  $  ^  \ rS rSr% SrSrS\S'   \R                  " \	S9r
S\S'   S	rS
\S'   S	rS\S'   S	rS\S'   \R                  " \S9rS\S'   S	rS\S'   S	rS\S'   \R                  " \	S9rS\S'   S	rS\S'   \R                  " \	S9rS\S'   \R                  " \S9rS\S'          SC                     SDU 4S jjjrSES jrSFS jrSGS  jrSGS! jr SH     SIS" jjrSJS# jrSHSKS$ jjrSLS% jrSMS& jrSNS' jr \!SOS( j5       r"\#        SPS) j5       r$\#SQS* j5       r%\#SRS+ j5       r&\#SRS, j5       r'\#   SS         STS- jj5       r(\# SU       SVS. jj5       r)\# SU       SWS/ jj5       r*\#SRS0 j5       r+\#SRS1 j5       r,\#SRS2 j5       r-\#SRS3 j5       r.SGS4 jr/      SXS5 jr0SHSYS6 jjr1SZS7 jr2S[S8 jr3SUS\S9 jjr4SNS: jr5SJS; jr6SJS< jr7SJS= jr8S]S> jr9S^S? jr:\;" S 5       SU   S_S@ jj5       r<SNSA jr=\=r>SBr?U =r@$ )`r  i"  z
A class that represents Kernels which are not directly lowered to Inductor
Loop Level IR, such as custom operators, or aten operators which we fallback to.
r   r  constant_argsr  r~  r   NOptional[ReinterpretView]output_viewr  python_kernel_namecpp_kernel_nameIterable[str]ordered_kwargs_for_cpp_kernelOptional[_OpOverloads]op_overloadzOptional[list[dict[str, Any]]]arg_propertieszdict[str, dict[str, Any]]allarg_propertiesz#Optional[dict[str, dict[str, Any]]]kwarg_propertiesz"dict[sympy.Symbol, pytree.KeyPath]unbacked_bindingszlist[MutationOutput]mutation_outputsc                @  > [         TU ]  UUUS9  X@l        U(       a  UO0 U l        X`l        Xl        U R                  U5        U R                  U5        Xl        U R                  5         0 U l
        / U l        [        R                  R                  U l        0 U l        g Nr	  )r  r  r	  r   r	  r	  set_cpp_kernel_nameset_python_kernel_namer	  collect_arg_kwarg_propertiesr	  r 
  rn   r  r	  fx_noder   )r  r   rN  rj  r	  r   r	  r	  r	  r	  r	  r  s              r   r  ExternKernel.__init__>  s     	 	 	

 + &fB&&  1##$67-J*))+!# "ww+++-r   c                     U /U R                   Q$ r   r 
  r  s    r   r  ExternKernel.get_outputs^  s    -t,,--r   c                    [        5       $ r   r9   r  s    r   r  %ExternKernel.get_unbacked_symbol_defsa  r  r   c                   [        U R                  [        R                  R                  5      (       af  U R                  R
                  R                   Vs/ s H:  nUR                  (       a  M  UR                  UR                  UR                  S.PM<     snO.[        [        U R                  5      5       Vs/ s H  n0 PM     snU l        [        U R                  [        R                  R                  5      (       aS  U R                  R
                  R                   Vs0 s H'  nUR                  UR                  UR                  S._M)     snO0 U l        [        U R                  [        R                  R                  5      (       a  U R                   (       dR  U R                  R
                  R                   Vs/ s H!  oR                  (       d  M  UR                  PM#     snU l        U R                  R
                  R                   Vs/ s H  oR                  (       d  M  UPM     snU l        g / U l        g s  snf s  snf s  snf s  snf s  snf )N)r   r   rb  )r   rb  )r   r	  r  _ops
OpOverload_schema	arguments
kwarg_onlyr   	real_typerb  r   r   rj  r	  r	  r	  schema_kwargs)r  r   r   s      r   r
  )ExternKernel.collect_arg_kwarg_propertiesd  s    $**EJJ,A,ABB ))11;; <A||FFKK%&__
 < $C$4565"56 	$ $**EJJ,A,ABB ))11;;;A qOO;
  	 d&&

(=(=>>55$($4$4$<$<$F$F6$Fq,,FAFF$F62  ++33=="=a="D "$D? 76"s0   I/(I:I#.IIII+Ic                    [        U R                  [        5      (       a!  U R                  5         U R	                  5         g g r   )r   rN  r  apply_constraintr<  r  s    r   r  ExternKernel.decide_layout  s0    dkk>22!!#  3r   c                    [        X5      u  p4U(       a  UR                  U5        U(       d  U R                  5       nU(       a  SSKJn  U" XSS9nUR                  X&5        g g )NrA   )'set_kernel_post_grad_provenance_tracingT)rj  )ra   make_commenttry_get_kernel_namer  r
  write_provenance_debug_handle)r  wrapperr	  
origin_str_detailed_origin_strr
  debug_handles          r   codegen_commentExternKernel.codegen_comment  s]     ,?t+M(
  ,224KFBTL 11+L r   c                    [         er   r  r  r
  s     r   codegenExternKernel.codegen  r  r   c                   Xl         [        R                  R                  (       a3  [	        U R
                  [        R                  R                  5      (       d  g U R
                  nU R                   c  UR                  S:X  aV  UR                  S:X  a  UR                  R                  S5      S   OUR                  R                  SS5      nSU S3U l         g UR                  R                  U l         g g )Natenrw  .r   r   z
at::_ops::z::call)r	  rn   r  cpp_wrapperr   r	  r  r
  r
  	namespace_overloadnamer   r  replacer
  r   )r  r	  r	  opnames       r   r
   ExternKernel.set_cpp_kernel_name  s    .ww""*ejj33+
 +
 !!'6) ++y8 OO))#.q100c: 
 *4F86'B$'-~~':':$ (r   c                   Xl         Ub  g U R                  nUc  g [        U[        R                  R
                  5      (       a  SUR                   3U l         g UR                  R                  SS5       SUR                   3U l         g )Nztorch.ops.higher_order.._ops..ops.r*
  )	r	  r	  r   r  r
  HigherOrderOperatorr   r   r.
  )r  r	  r	  s      r   r
  #ExternKernel.set_python_kernel_name  s    "4)!!>

 > >??(??P&QD# $$,,Xw?@&//ARS #r   c                Z   SSK Jn  U R                  5       =n(       a  UR                  O[        R
                  R                  n[        R
                  R                  (       a  U R                  $ [        R
                  R                  (       a  [        [        R
                  R                  U5      (       d(   [        [        R
                  R                  5      5       eU R                  c  g [        R
                  R                  R                  U R                  U5      $ U R                  $ )NrA   )CppWrapperCpu)codegen.cpp_wrapper_cpur7
  r  r   rn   r  device_type
fx_wrapperr	  r+
  r   rw  r	  get_c_shim_func_name)r  r7
  dr  s       r   r
   ExternKernel.try_get_kernel_name  s    :!%!22A29L9L77***WW  agg22MBB D$$E B ##+77''<<$$f  ***r   c                0    U R                  5       nUc   eU$ r   )r
  r  s     r   get_kernel_nameExternKernel.get_kernel_name  s!    '')r   c           	         [         R                  U R                  5       U R                  5       U R	                  5       U R                  5       U R                  5       U R                  5       S9nUR                  5         U$ )N)r  r  r  r  r}  r{  )	rO  r  r  r  r  r	  r  r  r  )r   r	  s     r   
copy_inputExternKernel.copy_input  sa    <<>++-]]_::<))+oo'  
 	

	r   c                "
  ^^ X#S.n[         R                  " U5      u  nm/ m/ n/ nU H  nTR                  [        U[        5      =(       a    [        U[
        5      (       + 5        TS   (       a  UR                  U5        M[  [        U[        5      (       a2  [        R                  R                  R                  R                  US S9nUR                  U5        M           SUU4S jjn	U V
s/ s H  oR                  U
5      PM     nn
U H  n
[        U
5      (       d  M  [        U
SS9  M!     / nU GH  n
[        U
[        5      (       dh  U
R!                  5       [        R                  R"                  ;   a<  UR                  [        R                  R"                  U
R!                  5          5        M  [        U
[        5      (       dh  U
R!                  5       [        R                  R$                  ;   a<  UR                  [        R                  R$                  U
R!                  5          5        M  [        U
[&        5      (       a"  UR                  U
R)                  5       5        GM5  [        U
[*        R,                  R.                  R
                  5      (       ar  U
R0                  R2                  nU
R0                  R4                  S:X  a  Uc   eUR                  [*        R6                  R8                  U   R;                  5       5        GM  UR                  [=        U
SS95        GM     U	" X5      u  pU" U0 UD6nS n[        R>                  R                  =n(       a  [        R@                  RB                  RE                  S	5      n[G        5       n[        R@                  RH                  [*        RJ                  RL                  RN                  L a  US
   n[Q        [        R@                  5      nU   [S        U[        R@                  U5        S S S 5        [U        UUU5      n[        U[V        [X        45      (       d  U/OUnU H  n[        U[*        RZ                  5      (       d  M$  UR\                  (       d  M7  [^        R`                  (       a  MN  Sn[        R                  R@                  RB                  RE                  SS 5      =n(       a  U SU 3nU[        R                  l1        M     UUUU	U4$ s  sn
f ! , (       d  f       N= f)N)r   r   r0  )rT  c                6  > / n[        U 5      n[        U5      nT H@  nU(       a  UR                  [        U5      5        M&  UR                  [        U5      5        MB     [        R                  " UT5      nUR                  S/ 5      UR                  S0 5      4$ )Nr   r   )rD  r  rC  pytreetree_unflattenr  )	new_tensor_argsnew_non_tensor_argsr  
it_tensorsit_non_tensors	is_tensorr  	args_specis_arg_tensors	          r   unflatten_args3ExternKernel.process_kernel.<locals>.unflatten_args
  s     Fo.J!"56N*	MM$z"23MM$~"67	 +
 %%fi8A55$aeeHb&999r   Tre  r!  )r   r0  rA   zEsparsity not handled. Please file issue for sparse inference weights.r  z Found from : 
 )rH
  r   rI
  r   r   ztuple[list[_T], dict[str, _T]])2rF
  tree_flattenr  r   r   GeneratorStater    rn   r  r  r   create_symintnoder	  r
  rO  ro  rs  	constantstorchbind_constantsr	  	get_valuer  r  irr  r   r   r!  default_generatorsclone_stater   r  r	  rb  r  r
   r  _higher_order_opseffectswith_effectsr/   r4   r0   r   r   Tensor	is_sparserB   graph_partitiondisable_cudagraphs_reason)r  r	  r   r   binded_args	args_flattensor_argsnon_tensor_argsr	  rO
  r   example_argsdevice_indexnew_args
new_kwargsexample_outputr	  r   node_meta_valctxexample_out_lir  msgr  rM
  rN
  s                           @@r   process_kernelExternKernel.process_kernel  s     $6%22;?	9%'C  3'O
30O,O R ""3'c4((''**44FFsQUFVC&&s+ 	:)	:@L	:+	: 	: 6AA[((+[A A$Q''%a5  	 	 A a**qzz|qww?P?P/P##AGG$5$5ajjl$CDq(++JJLAGG$?$??##AGG$?$?

$MNA//##AKKM2Au11@@AA xx~~xx}}.<3KKK##JJ11,?KKM ##$5aT$JK' *  .lL8Z8JN---9-NN//33E:M0;C~~$$(?(?(G(G(T(TT -a 0<Q^^L	1>>>J  9>=! ntUm<<  	  A1ell++KKK...]"#''"6"6";";"?"?t"TT;T E!2;-@C471   
 	
Y Bj s   2S;6T  
Tc                \   [        U[        5      (       d   [        U5      5       e[        U[        5      (       a  U$ UR	                  5       n[
        R                  R                  UR                  5       5      nUc   eUR                  5       nUb  SUR                  ;   a  [        U[        [        [        45      (       a  [        UR                  [        5      (       a  UR                  S   R                  [         R"                  S9(       d/  UR                  S   R                  [         R$                  S9(       a)  UR'                  [)        UR+                  5       5      5        OUR-                  5         [.        R0                  " UR+                  5       SS9u  pVUS   nUR3                  5       " U5      n[
        R                  R4                  R7                  X5      n[
        R                  R4                  R9                  X5      n	[
        R                  R4                  R;                  X5      n
[=        Xy5      U
-   nX:w  a  [>        RA                  SU	U
U5        [B        e[        URD                  [G        URI                  5       URK                  5       UR+                  5       U	U
SS9S	9$ )
z
In order to pass this to an extern kernel we need a
ReinterpretView not a View.  This allows us to avoid some
unneeded copies.
r0  r	  r  r}   r   z@convert_to_reinterpret_view failed: stride=%s offset=%s index=%sFr  rL  )&r   ro  r   rS  rp  rn   r  r  rs  r  rb  rr  r  rN  r  rk  r  rj  rl  rJ  r-   r	  r<  rC   r   r  r  r  stride_vars
offset_varrf   r  r  r  rM  rP  r  r  )r  r   x_unwrap_viewr  x_unwrap_view_fx_node
index_argsr  r  r   rU  rQ  expecteds               r   convert_to_reinterpret_view(ExternKernel.convert_to_reinterpret_viewl  s4    !X&&/Q/&a))H gg  !7!7!9: # 3 3 5 "-.333=?FJ*OPP=//@@%**51??"'"5"5 @  )--e4BB"'"8"8 C 
 77.}/E/E/GH '')!-!@!@JJL"

  ]
 ,  55eH''""..uA!!,,U?Z1F:IIR	 &%,,.kkmZZ\

 
	
r   c           	     `   Uc
  [        5       $ [        U[        [        R                  R
                  R                  [        45      (       a	  [        US9$ [        U[        5      (       am  [        5          [        R                  R                  [        R                  " UR                   UR#                  5       UR%                  5       S95      sS S S 5        $ [        U[&        5      (       a  U$ [        U[(        5      (       a  U R+                  UR,                  5      $ [        U[.        5      (       a1  [/        U R+                  UR,                  5      UR1                  5       S9$ [        U[2        5      (       a@  UR5                  5         [7        UR9                  5       5      (       a   U R;                  U5      $ [        U[>        5      (       a  UR5                  5         U$ [        U[@        [        45      (       a  U$ U RC                  U5      $ ! , (       d  f       GNF= f! [<         a     Nsf = f)N)r)  )r  r  rL  )"rA  r   r    r   r   r   r   r   r   r  r;   rn   r  add_tensor_constantr  rT  r  r  r  ri  r   r	  rM  rS  r  ro  r  r
  rp  rw
  r  rq  NonTensorObjrB
  r	  s     r   r	  ExternKernel.realize_input  s   9'))a$ 3 3 ; ;SABB(a00a"" ()ww22LLallnU *) a((Ha##$$QVV,,a))"&&qvv.q||~  a""IIK$Q]]_55::1== a$$IIKHa,(=>??H~~a  3 *)" + s   4AH(H  
H 
H-,H-c                    [        U5      (       a@  [        UR                  5       5      S:X  a  U$ UR                  5        H  nUS:X  d  M  Us  $    U R                  U5      $ r"  )r
  r   r2  rB
  )r  r   r  s      r   require_stride1ExternKernel.require_stride1  sR     ##1<<>"a',,.Q;H ) ~~a  r   c                
   Uc  Uc   eUR                  5       S;   a	  U(       d  U$ [        U5      (       Ga/  [        UR                  5       [        5      (       a  U(       a  [        X5      =(       a(    [        UR                  5       R                  5      (       + n[        USSU(       aJ  [        [        R                  R                  R                  UR                  5       R                  5      5      OUUS9  U$ [        USSS UUS9  U$ [        UR                  5       [        [        45      (       ay  U(       a$  UR                  5       R!                  U5      (       d>  U(       aG  [#        X1R                  5       R                  UR%                  5       5      (       a  Ub  ['        X5      $ U$ [        UR                  5       =n[(        5      (       a  [        UR+                  5       =n[        5      (       a  [-        S5      e[        U[        5      (       aO  U(       a  UR!                  U5      (       d0  U(       a+  [#        X7R                  UR%                  5       5      (       a  U$ [        U[.        5      (       ak  U(       a$  UR                  5       R!                  U5      (       d>  U(       a9  [#        X1R                  5       R                  UR%                  5       5      (       a  U$ [        U[0        5      (       a  [        UR2                  [4        5      (       a  [        UR2                  [6        5      (       d  [        UR9                  5       =n5      (       a  [;        US5      (       ao  [        UR2                  [<        5      (       dP   U R?                  UR2                  5      Ul        U(       a  U RA                  XUS9$ U(       a  U RC                  XUS9$  S n	UR%                  5       n
Ub  [        R                  R                  n[G        [I        UR%                  5       5      5       Vs/ s HJ  nURK                  X<   S	5      (       d  M  URM                  UR%                  5       U   S
5      (       d  MH  UPML     n	nU	 H.  n[N        RP                  RR                  RU                  XS	S5      nM0     U RW                  U5      n[        USSUUUS9  U(       a  [        X5      (       d   e U$ U	(       a<  U
b  Uc   e[N        RP                  RR                  RY                  X5      n['        X5      $ U$ ! [D         a     GNgf = fs  snf )N)r   rA   TF)rf  rp  rq  r@  ro  zHthe MutationLayoutSHOULDREMOVE's real layout shouldn't be FlexibleLayoutrM  rr  r   r   rA   )-r  r
  r   r  r  rv  r2   r  rO  r   rn   r  r  size_hints_or_throwrP  r  ru  rE  r	  rZ  r  r  r  r  r   rM  ro  rS  rp  r  r	  rw
  require_stride_orderrequire_exact_stridesr  r   r   r=  r  r  r  loweringslice_rB
  r#  )r  r   r   rN  r@  use_current_stride_ordermutation_layoutr  rp  expanded_dims	orig_sizer  r   rB  s                 r   require_stridesExternKernel.require_strides  s     M$===;;=F"=H !##!,,..99 0R0 0K3ALLN4I4IJJ - *#(-  8 - ! 0 0 D D$%LLN$9$9!" "'&3 H *#(-%)&3&3 HALLN[/,JKK1<<>;;EBB!1%||~'<'<ajjl  %0 4AE 
 $%LLN25O  $3$?$?$AA[N  )b   [99{<<UCC%5)+=+=qzz| 
 H a%%q||~77>>-!<<>#8#8!**, 
 Hq)$$1668,,qvv77%Q]]_&DkEEV,,{//1BCC88@33 4   #44 5   # .2JJL	$ww''H s1::<011A33M4DaH  11!**,q/1E 1   %OO,,33AAqA %
 NN1!''	
 5a????  (]-FFF((//=A21DDW ' s*   
6T3 T3 ,U&U7U3
U Uc           
         U R                  UU Vs/ s H:  n[        U[        R                  5      (       a  UR                  R
                  OUPM<     snUS9$ s  snf )N)rN  r@  )r
  r   r  SymIntr   r)  )r  r   rN  r@  r   s        r   r
  "ExternKernel.require_exact_strides  s]     ""KXKXaz!U\\::A= ( # 
 	
s   AA
c                "    U R                  XUS9$ )N)r   r@  )r
  )r  r   r   r@  s       r   r
  !ExternKernel.require_stride_order  s     ""1"OOr   c                .    U R                  U[        5      $ r   )r
  rk  r	  s     r   require_channels_last"ExternKernel.require_channels_last  s    ''+<==r   c                .    U R                  U[        5      $ r   )r
  rm  r	  s     r   require_channels_last_3d%ExternKernel.require_channels_last_3d  s    ''+=>>r   c                    SS jnU" U5      (       a  U$ U R                  U[        R                  UR                  5       5      5      $ )Nc                     U R                  5       nU[        R                  R
                  ;   =(       a'    [        R                  R
                  U   R                  $ ! [        [        4 a     gf = fr  )rs  AttributeErrorr  rn   r  rT
  	is_mkldnn)r   r   s     r   is_mkldnn_tensor9ExternKernel.require_contiguous.<locals>.is_mkldnn_tensor  s]    zz| 177,,,R1B1B41H1R1RR #$78 s   A A0/A0r   r   r   r   r
  r  r  r	  )r  r   r
  s      r   r  ExternKernel.require_contiguous  sC    	S AH,,>44QZZ\B r   c                h    U R                  U[        R                  UR                  5       5      5      $ r   r
  r	  s     r   require_contiguous_strides'ExternKernel.require_contiguous_strides  s-     ((~00>
 	
r   c                    g r   r   r  s    r   r
  ExternKernel.apply_constraint  r5  r   c                   [        U[        5      (       d   [        U5      5       e[        U[        5      (       d  [        U5      nU R                  (       d   S5       e[        U5      n[        U R                  5      nX4:  aq  [        R                  SU R                  XC-
  5        [        X45       H?  nU R                  U   S   nUR                  Xb;   a  X&   OU R                  U   S   5        MA     U$ )Nz/ExternKernel.arg_properties should not be emptyzv%s has %d unprovided positional arguments. Will check if they are in the keyword arguments or will use default values.r   rb  )r   r   r   r   r	  r   r  r  r	  r   r  )r  r   r   n_args
n_pos_argsr   arg_names          r   fill_non_provided_args#ExternKernel.fill_non_provided_args  s     $))54:5)$%%:D""U$UU"T,,-
 II^  #	 6...q1&9) $,,Q/@ / r   c                   [         R                  R                  (       Ga`  / nS nU(       ae  U R                  (       aT  [	        U R
                  5      [	        U5      :X  d   S5       eU R                   Vs0 s H  oDR                  S5      U_M     nn[        U R
                  5       H  u  pVUb3  Uc   eUR                  X   5      nU(       a  UR                  S5      OS nOb[	        U R                  5      U-   n	U R                  (       a7  U	[	        U R                  5      :  a  U R                  U	   R                  S5      OS nUR                  [         R                  R                  R                  Xh5      5        M     U$ U R
                   V
s/ s H,  n
[         R                  R                  R                  U
5      PM.     sn
$ s  snf s  sn
f )NzDnames passed to codegen_const_args does not match self.constant_argsr   r   )rn   r  r+
  r	  r   r	  r  r   rj  r  rw  val_to_arg_str)r  r  r  name_to_arg_propertiesr	  r   r   proptype_r   r  s              r   codegen_const_argsExternKernel.codegen_const_args  s   77F
 &*",,4--.#e*< Z< 594G4G*4GSGGFOS(4G ' * "$"4"45)5 ,,,155eh?D04DHHV,$Edkk*Q.C  ..3T=P=P9Q3Q ++C044V<! 
 agg22AA!KL 6 MDHDVDVWDVqAGG((77:DVWW'*& Xs   4G3G
c                    [         R                  R                  (       aD  U R                  b7  U R	                  / U R
                  QU R                  QU R                  5      nSnOU R
                  nSn/ n[        U5       H  u  pE[         R                  R                  (       a  U R                  (       a  U[        U R                  5      :  d   S5       eU R                  U   R                  S5      nUR                  [         R                  R                  R                  XV5      5        M  UR                  [         R                  R                  R                  U5      5        M     U(       a  UR                  U R!                  5       5        U$ )NFTz-Invalid access to ExternKernel.arg_propertiesr   )rn   r  r+
  r	  r
  rj  r	  r   r   r	  r   r  r  rw  r
  r  r
  )r  rj  need_codegen_constant_argsr   r   r   r
  s          r   codegen_argsExternKernel.codegen_args   s*   774#3#3#?003$++3 2 23T[[F */&[[F)-&f%DAww""**q3t7J7J3K/K CK ++A.226:AGG00??IJAGG00??BC & &KK//12r   c                    X;   a  UR                  U5      $ XR                  ;   a  U R                  R                  U5      $ U R                  R                  U5      =nb  UR                  S5      $ [        U S35      e)zGiven an argument name, queries for values in (in order):
1. any provided kwargs for this function.
2. the class self.kwargs member.
3. any available default arguments in self.allarg_properties.rb  z not in self.allarg_properties)r  r   r	  r  )r  r
  r   r	  s       r   get_kwargs_valueExternKernel.get_kwargs_value  st    
 ::h''{{";;??8,,))--h77CD77?++z)GHIIr   c           	        [         R                  R                  (       a  U R                  b  [	        U R
                  5      S:X  a  / $ / nU R                   H  nU(       a  US:X  a  M  U R                  U5      n[        U[        5      (       a  UR                  U5        MK  U R                  c   eU R                  R                  U0 5      R                  S5      nUR                  [         R                  R                  R                  XE5      5        M     U$ U R                  R!                  5        VVs/ s H3  u  pdU S[         R                  R                  R                  U5       3PM5     nnnU$ s  snnf )Nr   r   r   r  )rn   r  r+
  r	  r   r
  r	  r
  r   r    r  r	  r  rw  r
  r   r  )r  skip_outr   r
  r  r
  ks          r   codegen_kwargsExternKernel.codegen_kwargs'  s8   77+D4F4F0G10L	F >>E 1))(3a&&MM!$11=== 2266xDHHPEMM!''"6"6"E"Ea"OP ?"  !KK--//DA #Qqww++::1=>?/   	s   6:E5c                    U R                   bS  U R                   R                  n[        USS5      nUR                  SS5      nUR	                  SS5      S   nU SU 3nU$ SnU$ )	Nr   unknown_namespacer2
  r3
  r*
  rA   r   
unknown_op)r
  r  r   r.
  rsplit)r  r  op_namespaceop_names       r   get_op_nameExternKernel.get_op_nameA  sv    <<#\\((F"6<9LML'//'BL'..sA6q9L%ax0G  #Gr   c                   [         R                  (       a  [        R                  R                  (       d  [        U R                  5       5      S:X  a  g [        R                  R                  R                  U R                  5       5      n[        R                  R                  R                  U R                  5       5      nU R                  5       nUR                  SU R                  5        SU SU SU< S3	5        g g g )Nr   zassert_size_stride(r  r  )rB   size_assertsrn   r  r+
  ri   r	  rw  codegen_shape_tupler2  r
  ry  rs  )r  r
  r  r  r
  s        r   codegen_size_asserts!ExternKernel.codegen_size_assertsL  s    qww':':T]]_-277'';;DMMOLDWW))==doo>OPF&&(G%dmmo%6bb7+UVW (;r   c           	     j   [         R                  (       a  [        R                  R                  (       d~  U R                  5       nU[        R                  R                  ;  nU R                  5       nU(       a!  UR                  SU S[         SU< S35        g UR                  SU SU S35        g g g )Nzassert_alignment(r  r  z	# buffer z (op: z) is assumed to be not aligned)
rB   alignment_assertsrn   r  r+
  rs  rz  r
  ry  rb   )r  r
  r   alignedr
  s        r   codegen_alignment_asserts&ExternKernel.codegen_alignment_assertsX  s    ##AGG,?,?==?D!''";";;G&&(G!!'vR/@7+QO !!vVG94RS -@#r   c                    [         R                  R                  (       a  [        R                  R
                  (       a  gUR                  5         U R                  5       nUR                  SU SU S35        g)zS
Track outputs of fallback operators if config.test_configs.track_memory_lifecycle
Nztrack_tensor(z, 'z'))	rB   test_configstrack_memory_lifecyclern   r  r+
  "write_memory_track_allocation_oncers  ry  )r  r
  r   s      r   codegen_memory_tracking$ExternKernel.codegen_memory_trackingf  sV     ""99QWW=P=P224}}M$s4&;<r   c                N    U R                  5       nU R                  5       nU/ /U4$ )z4
get output sizes and strides, for template_codegen
)r	  r2  )r  r  r  s      r   get_group_strideExternKernel.get_group_strideq  s*     //#r{G##r   c                   [         R                  R                  nU R                  5       nU R	                  5       nU Vs/ s H  oAR                  U5      PM     nn[        [        U5      5       Vs/ s H  n[        SU 35      PM     nn[        [        [        U5      5      UR                  SS9n[        U5       VV	s0 s H  u  pX_M	     n
nn	[        [        U
5      5       Vs/ s H  oZU   PM	     nnU Vs/ s H  oVU   PM	     nnU R                  5       nU" U5      n[         R                  R                  R                  XbU/5      u  pn[        S5      u  nn[        [!        Xo" U Vs/ s H  nU" U5      PM     sn5      5      5      n[#        [$        R&                  " U5      U5      nU[)        U5      4$ s  snf s  snf s  sn	nf s  snf s  snf s  snf )z3
Manually get canonicalization of the output index
r<
  T)r  rB  c)rn   r  r  r	  r2  r  r   r   rg   r  rs  r   r  r  rM   r   r   rj   r   r#  r   )r  r  r`  rU  r   r   r  index_orderr   r   r   r   r_  r   	new_sizesr   r  r   add_varreplacements                       r   canonicalizeExternKernel.canonicalizez  s   
 77##//#29:'Q%%a(':;@U;LM;La(1QC1;L
MU3w<0g6I6ISWX+4[+AB+Axs#(+AB$)#f+$67$6q$67-23UmU
3##%
#%&WW%5%5%E%Ew&
"	F !%
73z7	3R	1GAJ	3R+STU5<<.<eI&&&+ ;M C73 4Ss#   F=1G G'G;G/Gc                    U(       a  [         O[        n[        R                  X5      nU R                   H  nX2" U5      -  nM     U R
                  R                  5        H  nX2" U5      -  nM     U$ r   )maybe_free_unbacked_symbolsmaybe_free_symbolsr  ra  r	  r   r   )r  ri  maybe_get_symbolsr  r	  s        r   ra  !ExternKernel.get_free_symbol_uses  sp     ,9'>P 	 --dB%%C"3''A &;;%%'C"3''A (r   c           
     ,   [        U SS 5      nSU< 3/nU[        R                  " U 5       Vs/ s H'  nUR                   S[        XR                  5       3PM)     sn-  nUR	                  SU R
                  < 35        U R                  U5      $ s  snf )Nr	  zpython_kernel_name=r  r  )r   r  fieldsr   r  r}  r  )r  r	  r  r  s       r   r   ExternKernel.__str__  s    d$8$?!+1
 	$++D1
1 zzl!GD**5671
 	
 	|D$4$4#789u%%
s   .B)r	  r   r	  r	  r	  r
  r   r 
  r	  r	  r	  r	  r
  r	  r   NNNNr   N)r   r  rN  r  rj  r	  r	  r  r   dict[str, Any] | Noner	  r	  r	  r  r	  r  r	  r	  r	  r	  r   r   r  r  r~  r   )r
  rs   r	  r  r   r   r
  rs   r   r   r	  r  r   r   )r	  r  r   r   r  r  )r   r   r   r   )r	  r|   r   r   r   r   r   zituple[Any, list[Any], list[Any], Callable[[Any, Any], Any], Optional[dict[sympy.Symbol, pytree.KeyPath]]])r   r   r   rS  r$  )NNF)
r   r   r   Optional[Sequence[int]]rN  r  r@  r   r   r   r  )r   r   rN  r  r@  r   r   r   )r   r   r   r  r@  r   r   r   )r   r  r   r~  r   r  )r  rz  r   r   r   r   )r
  r   r   r   r   r   )r
  r   r   r   )r   z'tuple[list[Sequence[Expr]], list[Expr]])r   ztuple[Expr, Sequence[Expr]]r  )Ar   r   r   r   r  r	  r   r  r  r   r   r	  r	  r	  r   r	  r	  r	  r	  r	  r	  r 
  r  r  r  r
  r  r"
  r&
  r
  r
  r
  r?
  r  rB
  rF  rn
  rw
  r	  r~
  r
  r
  r
  r
  r
  r  r
  r
  r
  r
  r
  r
  r
  r
  r
  r
  r
  r
  r
  rY   ra  r   rE  r   r  r  s   @r   r  r  "  s   
 $&M=%(..tDFND-1K*1(,,%)O]) 4?3D3D4!=  +/K'.59N293>3D3D40  =A9@<G<M<M=9  .9->->t-T*T (*(,15,0)-79.2.. . :	.
 %. &. /. *. '. (5. ,. 
. .@.#$J! KOM+M:GM	M"";0+$
 
 
 |
!|
*-|
9<|

|
 |
| C
 C
J !! !!F ! !  *.6:#aa 'a 4	a
 a 
a aF QV	
	
'9	
JN	
		
 	
 DIPP,P=AP	P P
 > > ? ?  " 
 
"!"+9"	"HXB4J4	
	=$'> N+$)!	! ,
& Hr   r  c                  x   ^  \ rS rSrSS jr       S                   SU 4S jjjrS	S jrSrU =r$ )
ExternKernelOuti  c                &    UR                  U 5        g r   )generate_extern_kernel_outr%
  s     r   r&
  ExternKernelOut.codegen      **40r   c
                :  > U R                  U5      n
[        U
[        5      (       d   [        U
5      5       e[        TU ]  S UU
UU=(       d    0 S UUUU	5
        [        R                  R                  U 5      U l	        [        R                  R                  U 5        g r   )r  r   r   r   r  r  rn   r  r  r   r  )r  rN  rj  r	  r   r	  r	  r	  r	  r	  unwrapped_inputsr  s              r   r  ExternKernelOut.__init__  s      ..v6*H55Mt<L7MM5Lb)	
 GG++D1		""4(r   c                    gra  r   r  s    r   r'  ExternKernelOut.should_allocate  r  r   r  r
  r
  )rN  r  rj  r  r	  r  r   Optional[dict[str, Any]]r	  r	  r	  r  r	  r  r	  r  r	  r	  r   r   r  )	r   r   r   r   r&
  r  r'  r   r  r  s   @r   r
  r
    s    1 (*+/15,0)-79.2)) !) %	)
 )) /) *) ') (5) ,) 
) ): r   r
  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )RandomSeedsi  c           	       > [         R                  " [         R                  5      n[        TU ]  [        U[         R                  U/S9/ UR                  UR                  U//SS[        R                  R                  S9  g )Nr  zaten.randint.low_outzat::_ops::randint_low_out::call)rN  rj  r	  r	  r	  r	  )r  r[  r%  r  r  rP  r|  r-  r)
  randintlow_out)r  countr  limitsr  s       r   r  RandomSeeds.__init__  sl    U[[)kkW
 !::vzzE7;5 >,, 	 	
r   r   )r   r   r  r  r   r   r   r   r   r   r  r   r  r  s   @r   r
  r
    s    
 
r   r
  c                  |   ^  \ rS rSrSS jr      S                 S	U 4S jjjrS
S jrSS jrSrU =r	$ )r	  i  c                &    UR                  U 5        g r   )generate_extern_kernel_allocr%
  s     r   r&
  ExternKernelAlloc.codegen  s    ,,T2r   c	                Z  > U R                  U5      n	[        S U	 5       5      (       d   e[        T
U ]  S U[	        [
        [           U	5      UU=(       d    0 S UUUU5
        / U l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r	  )r   r   s     r   r   -ExternKernelAlloc.__init__.<locals>.<genexpr>  s     C2BQ:a((2Br  )r  r   r  r  r   r   r   r  rn   r  r  r   r  )r  rN  rj  r	  r   r	  r	  r	  r	  r
  r  s             r   r  ExternKernelAlloc.__init__  s      ..v6C2BCCCCC&!#34Lb)	
 ')GG++D1		""4(r   c                    gr  r   r  s    r   r'  !ExternKernelAlloc.should_allocate  r  r   c                    [         er   r  r  s    r   r
  "ExternKernelAlloc.apply_constraint  r  r   )r   r  r
  )r   NNNr   N)rN  r  rj  r  r	  r  r   r
  r	  r  r	  r  r	  r  r	  r	  r   r   r  r~  )
r   r   r   r   r&
  r  r'  r
  r   r  r  s   @r   r	  r	    s    3 (*+/,0)-79.2)) !) %	)
 )) *) ') (5) ,) 
) )@" "r   r	  c                  h   ^  \ rS rSrSr        S	U 4S jjrS
S jrSS jrSS jrSS jr	Sr
U =r$ )r  i  zH
An output buffer that represents the mutation of a pre-existing buffer
c                   > [         TU ]  S US9  UR                  5       n[        R                  R                  U5        U/U l        X0l        [        R                  R                  U 5      U l	        g r>  )
r  r  rs  rn   r  r  mutation_namesmutating_noder  r   )r  rN  mutated_noder  mutated_node_namer  s        r   r  MutationOutput.__init__"  s`     	d62(113	##$5601(5GG++D1	r   c                    U R                   $ r   )r  r  s    r   r  MutationOutput.get_defining_op,  s    !!!r   c                    U R                   $ r   )r  r  s    r   rs  !MutationOutput.get_mutation_names/  r  r   c                    gr  r   r  s    r   r'  MutationOutput.should_allocate2  r  r   c                j    U R                  5       nS U 5        Vs/ s H
  nUc  M  UPM     sn$ s  snf )Nc              3  `   #    U  H$  n[         R                  R                  U5      v   M&     g 7fr   )rn   r  try_get_buffer)r   r   s     r   r   6MutationOutput.get_mutation_buffers.<locals>.<genexpr>9  s"     P..t44s   ,.)rs  )r  r  r  s      r   get_mutation_buffers#MutationOutput.get_mutation_buffers5  s@    002 QP
P P
 	
 
s   00)r  r  r   )rN  r  r  r   r  r  r   r   r1  r  r  r   r  )r   r   r   r   r  r  r  rs  r'  r!  r   r  r  s   @r   r  r    sF    2 2062GP2	2"#
 
r   r  c                     ^  \ rS rSr% Sr0 rS\S'   \      SS j5       r\      SS j5       r	        SU 4S jjr
SS jrSS	 jrS
rU =r$ )TMADescriptori>  aL  
An IR node representing a generic host-side TMA descriptor in the Triton API
Mostly useful for user-defined Triton kernels relying on host-side TMA;
but can, in principle, be used for Inductor's Triton templates, too.

See TMADescriptorExperimental and TMADescriptorStable for the two implementations
(the old API and the new API)
zdict[Any, TMADescriptor]_CACHEc                    [        U5      S:X  d   eUS   S:X  a  [        U/US   Q76 $ US   S:X  d   e[        U/US   Q76 $ )Nr   r   experimentalrA   r>  )r   TMADescriptorExperimentalTMADescriptorStable)r  rT  tma_metas      r   _create_implTMADescriptor._create_implL  s\     8}!!!A;.(,VBhqkBBA;(***&v<<<r   c                    [        U5      U4nX0R                  ;  a  U R                  X5      U R                  U'   U R                  U   $ r   )idr&  r,  )r  rT  r+  r  s       r   r  TMADescriptor.createW  sB     &z8$jj !..v@CJJsOzz#r   c           
     8  > [         TU ]  S [        [        UUR	                  5       S95      [        [        [           U5      [        U5      S 5        Xl	        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )NrL  )r  r  r  rS  r  r   r   rr  r   rT  rn   r  r  r   r  )r  rT  rj  r	  r  s       r   r  TMADescriptor.__init__`  s     	 !,,. &!6*- 	
  GG++D1		""4(r   c                &    UR                  U 5        g r   )generate_tma_descriptorr%
  s     r   r&
  TMADescriptor.codegenw      ''-r   c                    U R                   $ r   )rT  r  s    r   
get_tensorTMADescriptor.get_tensorz  r  r   )r   rT  )rT  r   r+  ztuple[str, tuple[Any, ...]]r   r%  )rT  r   rj  r  r	  r  r   r   r
  r  )r   r   r   r   r  r&  r   rF  r,  r  r  r&
  r8  r   r  r  s   @r   r%  r%  >  s     (*F$)=='B=	= = 'B	 ))&3)DQ)	).. r   r%  c                  H   ^  \ rS rSrSr S         SU 4S jjjrSrU =r$ )r)  i~  z
the new host-side TMA Descriptor API:
(the ones obtained via create_{1d,2d}_tma_descriptor calls).

See also TMADescriptorStable for the new API.
c                ^  > [        U5      S;   d   e[        U5      [        U5      :X  d   eUc  UR                  5       R                  nX l        X0l        X@l        [        U R                  5      U l        U/n/ U R                  QU R                  QU R
                  Pn[        TU ]!  UUUS9  g )N)rA   r   rT  rj  r	  )	r   r  r  r  
block_dimselement_sizer;  r  r  )r  rT  r  r=  r>  rj  r	  r  s          r   r  "TMADescriptorExperimental.__init__  s     4yF"""4yC
O+++!++-66L	$(		N	
YY
__
 
 	' 	 	
r   )r=  r  r>  r;  r   )
rT  r   r  list[Union[int, torch.SymInt]]r=  r@  r>  r  r   r   r   r   r   r   r  r  r   r  r  s   @r   r)  r)  ~  sG     '+

 -
 3	

 $
 

 
r   r)  c                  0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )r*  i  z
the new host-side TMA descriptor API
(the ones obtained via TensorDescriptor.from_tensor).

See also TMADescriptorExperimental for the old API.
c                2   > X l         [        TU ]	  UU/US9  g )Nr<  )block_shaper  r  )r  rT  rD  r  s      r   r  TMADescriptorStable.__init__  s&    &8% 	 	
r   )rD  )rT  r   rD  r@  rA  r  s   @r   r*  r*    s    
 
r   r*  c                  J   ^  \ rS rSr          SU 4S jjrSS jrSrU =r$ )SubgraphBufferi  c                  > [         T
U ]  S X5        X0l        X@l        [        R
                  R                  U 5      U l        [        R
                  R                  U 5        [        R
                  R                  U R                  XE5      U l
        [        U R                  5      (       d   e[        U R                  5      nU HT  nXpR                  R                  UR                  '   U R                  R                  R!                  UR                  5        MV     U Vs/ s H  oR                  PM     snU l        SS KJs  Jn	  [        R*                  " U R                  5         U	R-                  SSSS9   U R                  R.                  " U R                  6   S S S 5        S S S 5        g s  snf ! , (       d  f       N= f! , (       d  f       g = f)Nr   FATEN)max_autotunemax_autotune_gemmmax_autotune_gemm_backends)r  r  rd  example_inputsrn   r  r  r   r  make_subgraphsubgraphr	  rj  rm  r(  graph_input_namesr  
sym_inputstorch._inductor.configr  rB   set_graph_handlerr   run)r  rN  r   rd  rM  subgraph_namerQ  sym_inpsym_varinductor_configr  s             r   r  SubgraphBuffer.__init__  sR    	v3,GG++D1		""4(--dgg~U,,,,(5
!G7>MM&&w||4MM++227<<@ " 8BBzG<<zB88  / &&""'+1 ' 
 !!4#6#67 0/	 C  0/s*   F,%G7$F1G1
F?	;G
Gc                $    " S S5      n[        U R                  5      (       d   eU R                   Vs/ s H  o3R                  5       PM     nnUR                  U" U R                  5      / U R
                  QUQU R                  /5        g s  snf )Nc                      \ rS rSrSS jrSrg),SubgraphBuffer.codegen.<locals>.CodegenGraphi  c                2    Xl         UR                  U l        g r   r  r   )r  r  s     r   r  5SubgraphBuffer.codegen.<locals>.CodegenGraph.__init__  s    "
!JJ	r   r^  N)r  rt   )r   r   r   r   r  r   r   r   r   CodegenGraphr\    s    'r   r`  )r	  rj  r	  'codegen_subgraph_with_flattened_outputsrO  rQ  r   )r  r
  r`  r  outer_inputss        r   r&
  SubgraphBuffer.codegen  s|    	' 	'
  ,,,,7;{{C{!++-{C77'-doo--YYK	
 Ds   B)rM  rd  r   rO  rQ  )
rN  r  r   r  rd  torch.fx.GraphModulerM  	list[Any]rU  r   r
  r   r   r   r   r  r&
  r   r  r  s   @r   rG  rG    sC    "8"8 ""8 !	"8
 ""8 "8H
 
r   rG  c                     ^  \ rS rSrS
S jr\SS j5       r\" S 5       S   SU 4S jjj5       rSS jr	          SU 4S jjr
SS jrSS jrS	rU =r$ )UserDefinedTritonKerneli  c                z  ^ SSK Jn  SSKJn  UR	                  U R
                  5      m/ n/ n/ n[        TU5      (       a  [        TS5      (       a&  UR                  U4S jTR                   5       5        O.[        TS5      (       d   eUR                  TR                  5        [        TS5      (       a<  TR                   H+  nUR                  TR                  R                  U   5        M-     O.[        TS5      (       d   eUR                  TR                  5        TR                   nTR                  mTX4U4$ )	Nr   )	Autotuner)kernel_side_tablerestore_idxc              3  V   >#    U  H  nTR                   R                  U   v   M      g 7fr   )r   	arg_names)r   r   r	  s     r   r   BUserDefinedTritonKernel.get_kernel_and_metadata.<locals>.<genexpr>  s$      *4FqFII''*4Fs   &)restore_value	reset_idxreset_to_zero)triton.runtime.autotunerrj  *torch._higher_order_ops.triton_kernel_wraprk  
get_kernel
kernel_idxr   r  r  rl  rp  rq  r  r   rn  rr  configs)r  rj  rk  rw  restore_value_argsreset_to_zero_argsr   r	  s          @r   get_kernel_and_metadata/UserDefinedTritonKernel.get_kernel_and_metadata  s   6P"--doo>(*(*fi(( v}--")) *4:4F4F*  v7777"))&*>*>?v{++))A&--fii.A.A!.DE * v7777"))&*>*>?nnGYYFw4FFFr   c                J  ^ SSK Jn  U R                  5       u  nnnnUR                  UUU R                  UUU R
                  5      u  nnn	n
U R                   Vs0 s H  oU R                  U5      _M     nnUR                   Vs/ s H  oR                  PM     snmUR                   Vs/ s H!  oR                  (       d  M  UR                  PM#     nn[        U4S jU 5       5      n/ n/ n/ n/ n[        R                  " UR                  5       [!        [        R"                  " S5      U
5      5       GH  u  nnUU;   a  U" 5       (       a  M  UR%                  U5        UR%                  U5        ['        U[(        5      (       a@  UR%                  UR+                  5       5        UR%                  UR-                  5       5        M  ['        U[.        [0        [2        [4        R6                  45      (       a-  UR%                  U5        UR%                  [9        U5      5        M  UU;   a)  UR%                  S5        UR%                  [.        5        GM  UcY   U" 5       (       a)  UR%                  S5        UR%                  [.        5        GMV  UR;                  5         UR;                  5         GMy  [=        S[9        U5       SU 35      e   U R?                  X5        URA                  UUUUUUU	S	U RC                  5       U RD                  R                  S
9
  gs  snf s  snf s  snf )QOverrides the parent member.
See https://github.com/pytorch/pytorch/issues/151692r   )triton_version_uses_attrs_dictc              3  .   >#    U  H
  nTU   v   M     g 7fr   r   )r   r   rn  s     r   r   2UserDefinedTritonKernel.codegen.<locals>.<genexpr>.  s     $F:aYq\:s   r  r0  NzUnsupported arg type: r  T)	arg_typesraw_argsraw_keystriton_metainductor_metar#  r  original_fxnode_name)#r	  r~  rz  !define_user_defined_triton_kernelr   gridr	  r
  paramsr   is_constexprnumr:   r  r2  r  r   repeatr  r   r   r	  r  r   rL  r   r   r    r   r  r  r"
  generate_kernel_callr  r
  )r  r
  r~  r	  rw  rx  ry  new_namer  r  extra_launch_argsr
  
named_argsr  
constexprsconstexpr_namesr   r  raw_keys_filteredraw_args_filteredr   r	  rn  s                         @r   r&
  UserDefinedTritonKernel.codegen  s   
 	I ((*	
 55KKII
	
 261S1S
1SAt$$Q''1S 	 
 &,]]3]VV]3	%+]]E]nneaee]
E$$F:$FF!	')')"I$4$4R$8:K L
ID# &+I+K+K$$T*$$S)#v&&C1134  1C#udEJJ!?@@C   c+( B  % 233KKO$$S)%))+%))+),B49+RPSu*UVVI
L 	W/$$&&#'??$!%!2!2 	% 	
e
 4Es   LL+L L c                P   > [         TU ]  U5      [        U R                  U5      -  $ r   )r  ra  r(   r  r  s     r   ra  ,UserDefinedTritonKernel.get_free_symbol_usesh  s-     w+M:=MII}>
 
 	
r   c                    [        5       $ r   r9   r  s    r   r  0UserDefinedTritonKernel.get_unbacked_symbol_defsr  r  r   c          	     h  > / n0 n/ nUR                  5        H  u  p[        U	[        5      (       aX  [        R	                  U R                  U	5      5      n
X;   a  [        R                  XU   5      n
UR                  U
5        XU'   Mr  UR                  U	5        XU'   M     [        U5      S:w  d   eUS   R                  5       U l        [        U[        5      (       d   [        U5      5       e[        TU ]=  S [!        U R                  S9U[#        U5      U5        Xl        X l        U R)                  5       u  p  n[+        US5      (       d   eUR,                   Vs/ s H  oU;   d  M
  UPM     snU l        SSKJn  [        U5      S:  a  US   R4                  O0 nU" U0 UEUEU5       Vs/ s H  nUU   PM
     snU l        U R6                   Vs/ s H!  n[9        [!        U R                  S9UU 5      PM#     snU l        [<        R>                  RA                  U 5        g s  snf s  snf s  snf )Nr   r  rn  )identify_mutated_tensors)!r  r   r   r  r	  r	  r%  r  r  r   r  r  r   r   r  r  r  r   rv  r  rz  r  rn  r	  rt  r  r   mutable_argsr  r 
  rn   r  r  )r  rv  r  tma_descriptor_metadatakernel_argsrj  r   r	  r
  r  r  r	  rw  r   r	  r  autotuned_kwargsr  r  r  s                      r   r   UserDefinedTritonKernel.__init__u  s$     "$&&(%%'DA!Y'' 99$:L:LQ:OP/%,,Q0JKAa q	$$Q'q	 ( 6{aQi**,&(++9T&\9+dkk*- 	
 %	 $ < < >A v{++++!++.
+Ck/AC+.
* 	X03Gq0@71:,,b 03;3"23'
 
 ((!
( :T[[93E(!
 	
""4().

!
s   	H%,H%0H*(H/c                ,    [        U R                  5      $ r   )r   r 
  r  s    r   r  #UserDefinedTritonKernel.get_outputs  s    D))**r   c                    U R                   $ r   r  r  s    r   r  "UserDefinedTritonKernel.get_device  r  r   )r  r  rv  r  r 
  r	  )r   z(tuple[Kernel, Any, list[str], list[str]]r
  r  r  r  )
rv  r   r  r   r  r~  r  r~  r   r   r  r  )r   r   r   r   rz  r   r&
  rY   ra  r  r  r  r  r   r  r  s   @r   rh  rh    s    G@ X
 X
t 56$)
!
	!
 7
=) =) 	=)
 "0=) $=) 
=)~+ r   rh  c                  h   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr        SU 4S jjr	Sr
U =r$ )InplaceBernoulliFallbacki  =
This needs to be a custom class to handle mutation properly
c                    [        S U R                   5       5      (       d   eS U R                   5       u  n[        R                  R                  (       a\  UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g UR                  U R                  5        SU SSR                  [        [        U R                  5      5       SUR                   35        g )Nc              3  B   #    U  H  n[        U[        5      v   M     g 7fr   r	  r   r  s     r   r   3InplaceBernoulliFallback.codegen.<locals>.<genexpr>  s     >+Q:a((+r  c              3  ^   #    U  H#  n[        [        U5      R                  5       v   M%     g 7fr   )r   r   r	  r  s     r   r   r    s"     I[VQ1133[s   +-r  r  z, NULL)r  )r   rj  rn   r  r+
  ry  r?
  r  r  reprr	  ending)r  r
  r   s      r   r&
   InplaceBernoulliFallback.codegen  s    >$++>>>>>IT[[I77 '')*!A3b3tTEWEW;X1Y0ZZabibpbpaqr '')*!A3b3tTEWEW;X1Y0ZZ[\c\j\j[klr   c                    gr  r   r  s    r   r'  (InplaceBernoulliFallback.should_allocate  r  r   c                &    U R                  S5      /$ r  r	  r  s    r   rs  +InplaceBernoulliFallback.get_mutation_names      "##r   c                    [        5       $ r   r9   r  s    r   r  1InplaceBernoulliFallback.get_unbacked_symbol_defs  r  r   c                R  > [         TU ]  S [        UR                  5       S9U R	                  U/5      UUS9  [
        R                  R                  UR                  5       5        [
        R                  R                  U 5      U l
        [
        R                  R                  U 5        g )Nr  r	  )r  r  r  r  r  rn   r  r  rs  r  r   r  )r  r	  r   r	  r  s       r   r  !InplaceBernoulliFallback.__init__  s     	alln-$# 	 	
 	
##AJJL1GG++D1		""4(r   r  r
  r  r  r  )r	  r|   r   r   r	  r   r   r   r   r   r   r   r  r&
  r'  rs  r  r  r   r  r  s   @r   r  r    sF    $)'),2)DG)	) )r   r  c                     ^  \ rS rSrSrS
S jrSS jrSS jrSS jr        SU 4S jjr	\
 S       SS jj5       rS	rU =r$ )InplaceCopyFallbacki  r  c                N    U R                  5       u  p#nUR                  X2U5        g r   )r
  codegen_device_copy)r  r
  r  r  non_blockings        r   r&
  InplaceCopyFallback.codegen  s%    #'#4#4#6 <##Cl;r   c                    gr  r   r  s    r   r'  #InplaceCopyFallback.should_allocate  r  r   c                &    U R                  S5      /$ r  r  r  s    r   rs  &InplaceCopyFallback.get_mutation_names  r  r   c                    [        5       $ r   r9   r  s    r   r  ,InplaceCopyFallback.get_unbacked_symbol_defs  r  r   c           	       > [         TU ]  S UUUSSS9  [        R                  R	                  US   R                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g )Nz
aten.copy_aoti_torch_copy_)r	  r	  r   )	r  r  rn   r  r  rs  r  r   r  )r  rN  rj  r	  r  s       r   r  InplaceCopyFallback.__init__  sr     	+. 	 	
 	
##F1I$6$6$89GG++D1		""4(r   c                    X4 Vs/ s H  o@R                  U5      PM     nnU4n[        [        UR                  5       S9UU5      nU$ s  snf rJ  )r	  r  r  r  )r  r  r  r  r  rj  r	  r  s           r   r  InplaceCopyFallback.create  sU     25
;
1##A&
;%$cnn./

  <s   A	r  r
  r  r  r  )rN  r  rj  r  r	  r  r   r   r  )r  r   r  r   r  r   r   r  )r   r   r   r   r  r&
  r'  rs  r  r  rF  r  r   r  r  s   @r   r  r    s~    <$)) !) %	)
 
)$ <A

%
59
	
 
r   r  c                  J    \ rS rSrSrS
S jrSS jrSS jrSS jrSS jr	Sr
g	)MutatingFirstArgExternKerneli  r  c                   [        U R                  5      (       d   e/ S U R                   5       Q[        [        U R                  5      QnUR                  U R                  5        SSR                  U5       SUR                   35        g )Nc              3  @   #    U  H  oR                  5       v   M     g 7fr   )r	  r  s     r   r   7MutatingFirstArgExternKernel.codegen.<locals>.<genexpr>"  s     9[!!##[s   r  r  r  )	r	  rj  r  r  r	  ry  r?
  r  r  )r  r
  argrefss      r   r&
  $MutatingFirstArgExternKernel.codegen  s    ,,,,
9T[[9
t))*
 	##%&a		'(:';1W^^<LM	
r   c                    gr  r   r  s    r   r'  ,MutatingFirstArgExternKernel.should_allocate)  r  r   c                &    U R                  S5      /$ r  r  r  s    r   rs  /MutatingFirstArgExternKernel.get_mutation_names,  r  r   c                    [        5       $ r   r9   r  s    r   r  5MutatingFirstArgExternKernel.get_unbacked_symbol_defs/  r  r   c                    gra  r   r  s    r   has_side_effects-MutatingFirstArgExternKernel.has_side_effects2  r  r   r   Nr
  r  r  r  )r   r   r   r   r  r&
  r'  rs  r  r  r   r   r   r   r  r    s     
$r   r  c                  ,   ^  \ rS rSrSU 4S jjrSrU =r$ )ResizeStorageBytesi6  c                  > [        U[        5      (       d   S5       e[        TU ]  S [	        UR                  5       S9U R                  U/5      U4S9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R                  U 5        SU l        SU l        [        U[         ["        [$        45      (       d   ['        U5      5       e[        R                  R(                  R+                  UR,                  R                  5       5        g )NzTODO: dynamic shapesr  )r	  z"inductor_ops.resize_storage_bytes_z&torch::inductor::resize_storage_bytes_)r   r   r  r  r  r  r  rn   r  r  rs  r  r   r  r	  r	  ro  rq  r   r   never_reuse_buffersr  rM  )r  variabler  r  s      r   r  ResizeStorageBytes.__init__7  s    (C((@*@@(h1134
+#+	 	 	
 	
##H$5$5$78GG++D1		""4("FG(Xz9$EFFVXVF	##''(>(>(@Ar   )r	  r   r	  )r  r   r  r   r   r   r  r  s   @r   r  r  6  s    B Br   r  c                  6   ^  \ rS rSrSU 4S jjrSS jrSrU =r$ )SetSourceTensorKerneliH  c                  > UR                  5         [        TU ]	  UR                  5       X/S[        R
                  R                  R                  R                  S9  [        U[        [        [        45      (       d   [        U5      5       e[        R                  R                   R#                  UR$                  R'                  5       5        [        R                  R                   R#                  UR'                  5       5        [        R                  R                   R#                  U R'                  5       5        UR)                  5       n[+        [-        US9X5      [+        [-        US9X 5      /U l        g )Nz!torch.ops.aten.set_.source_Tensor)r	  r	  r  )r<  r  r  r  r  rl   r)
  set_source_Tensorr   ro  rq  r   r   rn   r  r  r  rM  rs  r  r  r  r 
  )r  self_tensorstorage_tensorr  r  s       r   r  SetSourceTensorKernel.__init__I  s   $$&%%')B		++99	 	 	
 +*i'HII 	
4L
 	
I 	
##''(8(8(A(A(CD	##''(?(?(AB	##''8**,:V4kH:V4nK!
r   c                F    U R                  S5      U R                  S5      /$ r"  r  r  s    r   ry  2SetSourceTensorKernel.get_inputs_that_alias_output]  s    "DOOA$677r   r	
  )r  r   r  r   r   r   r  )r   r   r   r   r  ry  r   r  r  s   @r   r  r  H  s    
(8 8r   r  c                     ^  \ rS rSrSrSS jrSS jrSS jrSS jrSSS	.               SU 4S
 jjjr	Sr
U =r$ )ScatterFallbackia  z
This needs to be a custom class to handle mutation properly.
This class handles both aten.scatter_ and aten.scatter_reduce_.
It also handle the case `src` being a scalar properly.
c                &    UR                  U 5        g r   )generate_scatter_fallbackr%
  s     r   r&
  ScatterFallback.codegenh  s    ))$/r   c                    gr  r   r  s    r   r'  ScatterFallback.should_allocatek  r  r   c                p    U R                   S   n[        U[        5      (       d   eUR                  5       /$ r  r	  r  rl  s     r   rs  "ScatterFallback.get_mutation_namesn  s1    kk!n#v&&&&r   c                    [        5       $ r   r9   r  s    r   r  (ScatterFallback.get_unbacked_symbol_defss  r  r   NTr  include_selfc               d  > [        U[        5      U l        U R                  (       a&  X$U4 Vs/ s H  oR                  U5      PM     n	nU4n
O$X$4 Vs/ s H  oR                  U5      PM     n	nX54n
[        TU ]  S [        UR                  5       S9U R                  U	5      U
XgS.[        U5      SS/US9  [        R                  R                  UR                  5       5        [        R                  R                  U 5      U l        [        R                  R!                  U 5        g s  snf s  snf )Nr  r  r  r  )r	  r	  r	  )r   r   src_is_tensorr	  r  r  r  r  r  r   rn   r  r  rs  r  r   r  )r  r	  r   rB  r   r  r  r  r  tensorsr	  r  s              r   r  ScatterFallback.__init__v  s    (Y7 78oFo))!,oGF FM78jAj))!,jGA JMalln-(<";/+3^*D# 	 		
 	
##AJJL1GG++D1		""4(% G Bs   D(D-)r   r  r
  r  r
  r  )r	  r|   r   r   rB  r   r   r   r  r   r  r  r  r   r   r   r  r  s   @r   r  r  a  s|    0 
 !%!!)!!) !) 	!)
 !) !) !) !) 
!) !)r   r  c                  p   ^  \ rS rSrSrS	S jrS
S jrSS jrSS jr            SU 4S jjr	Sr
U =r$ )IndexPutFallbacki  zI
This needs to be a custom class to handle mutation and indices properly
c                &    UR                  U 5        g r   )generate_index_put_fallbackr%
  s     r   r&
  IndexPutFallback.codegen  s    ++D1r   c                    gr  r   r  s    r   r'   IndexPutFallback.should_allocate  r  r   c                &    U R                  S5      /$ r  r  r  s    r   rs  #IndexPutFallback.get_mutation_names  r  r   c                    [        5       $ r   r9   r  s    r   r  )IndexPutFallback.get_unbacked_symbol_defs  r  r   c           
       > X0l         U Vs/ s H	  ofc  M  UPM     nnX$/UQ Vs/ s H  o R                  U5      PM     nnSn	[        T
U ]  S [	        WR                  5       S9U R                  U5      U4SU	US9  [        R                  R                  U R                  S5      5        [        R                  R                  U 5      U l        [        R                  R                  U 5        g s  snf s  snf )Naoti_torch_index_put_outr  zaten.index_put_)r	  r	  r	  r   )r  r	  r  r  r  r  r  rn   r  r  r	  r  r   r  )r  r	  r   r  r   
accumulater   valid_indicesr  r	  r  s             r   r  IndexPutFallback.__init__  s     $+=GqG=342M}2MN2MQ%%a(2MN4alln-(M0+# 	 	
 	
##DOOA$67GG++D1		""4( >Ns   C-C-C2)r  r   r
  r  r  r  )r	  torch._ops.OpOverloadr   r   r  re  r   r  r  r   r   r   r  r  s   @r   r   r     s`    2$)*) ) 	)
 ) ) 
) )r   r   c                  2    \ rS rSr\SS j5       rSS jrSrg)
DeviceCopyi  c           
        UR                  5       nUc   eUR                  5       (       d  [        U5      [        R                  R
                  ;  a  [        S UR                  5        5       5      (       a  [        R                  R                  (       dn  [        R                  R                  (       a>  [        R                  R                  U5        [        R                  R                  U5        UR                  U5      $ [        R                  R                  U5        [        R                  R                  U5        [        S5        U4n[        R!                  U5      nS nUR#                  5       (       a  UR%                  5       n['        UR(                  5      =(       a    UR(                  S:H  =(       a    UnUR(                  S:H  =(       a    ['        UR(                  5      =(       a    UnU(       a%  [+        U5      (       a  SUR-                  5       l        [1        [3        UUR5                  5       UR#                  5       UUS9U R7                  U5      /U5      $ )Nc              3  Z   #    U  H!  o[         R                  R                  ;   v   M#     g 7fr   )rn   r  rT
  r  s     r   r   $DeviceCopy.create.<locals>.<genexpr>  s     G4Fq***4Fs   )+zDeviceCopy in input programr   Tr  )r  rj  rt  rn   r  r{  r   r  rB   aot_inductoruse_runtime_constant_foldingr+
  add_device_inforp  r]   r  r  r	  r2  re   r   r
  r  rR  r  rP  r  r	  )	r  r   r  r  x_devicer	  r  is_destination_pinnedis_source_pinneds	            r   r  DeviceCopy.create  s   <<>###Qqww'>'>>GA4D4D4FGGG''DDww"" ''/''1''//	'	)78%++A.::<<\\^F8==!KfkkU&:K| 	 MMU"Kvfkk':K| 	  5a 8 8'+ALLN$

/ q!"

 
	
r   c                   U R                  5       n[        U5      S:X  d   eU R                  (       a2  UR                  US   U R                  R	                  5       US   5        g UR                  US   U R	                  5       US   5        g )Nr   r   rA   )r
  r   r	  r  r	  )r  r
  r   s      r   r&
  DeviceCopy.codegen  s{      "4yA~~''Q));;=tAw ''Q1G1G1I4PQ7Sr   r   N)r   r   r  r  r  r   r   r   r
  )r   r   r   r   rF  r  r&
  r   r   r   r   r  r    s    -
 -
^Tr   r  c                     ^  \ rS rSrSrS
S jrSS jr              SU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )DynamicSelectStorageOffseti  a  
The result of computing a dynamic selection index is determined as follows: when the index in the
select operation is unbacked, the actual index calculation is ambiguous for negative indices
(index + size) versus non-negative indices (just index). To resolve this, we allocate an unbacked
SymInt to represent the storage offset and decompose the select operation into a call to as_strided,
computing the storage offset at runtime with this node.
c                    [        5       $ r   r9   r  s    r   r  $DynamicSelectStorageOffset.get_reads  r  r   c                    gr  r   r  s    r   r'  *DynamicSelectStorageOffset.should_allocate  r  r   c                   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        X0l        X@l        XPl	        X`l
        g Nr   r  )r  r  r  r  r  unbacked_offset_symbolr   base_offsetbase_dim_strider  r  )r  r&  r   r'  r(  r  r  r  s          r   r  #DynamicSelectStorageOffset.__init__  sG     	ze1DErJ '=#
&.	
r   c                .    [        U R                  /5      $ r   )r:   r&  r  s    r   r  3DynamicSelectStorageOffset.get_unbacked_symbol_defs$  s    466788r   c                .    [        U R                  U5      $ r   )r(   r   r`  s     r   ra  /DynamicSelectStorageOffset.get_free_symbol_uses'  s      

M::r   c                6    UR                  X R                  S9  g )Nr	  )codegen_dynamic_select_indexr  r%
  s     r   r&
  "DynamicSelectStorageOffset.codegen-  s    ,,T,Dr   )r(  r'  r  r   r  r&  r  r  )r&  sympy.Symbolr   r1  r'  Union[sympy.Symbol, int]r(  r2  r  r2  r  r   r   r   r  r  r  r
  r   r   r   r   r  r  r'  r  r  rY   ra  r&
  r   r  r  s   @r   r  r    s     ,  .	
 2 '  
&9 89$);!;	!; :;
E Er   r  c                     ^  \ rS rSrSrS
S jrSS jr          SU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )DynamicSliceSizei1  a7  
Computes the output size of a slice call, handling the correct semantics in codegen.
We do this for flexible handling for unbacked indices (to not data-dependent error).

Slicing has 4 semantics for indices, i.e. x[start:] could be:
1) start < -x.size(0)            -> x[0:]                    # negative out-of-bounds
2) start in [-x.size(0), 0)      -> x[x.size(0) + start:]    # negative slicing
3) start in [0, x.size(0))       -> x[start:]                # standard slicing
4) start >= x.size(0)            -> empty slice              # positive out-of-bounds

If the appropriate semantics are known beforehand, the output size is computed based on
the start & end indices. If not (with unbacked indices), a new unbacked symbol is created
to represent the output size, and codegen handles computing the correct case.
c                    [        5       $ r   r9   r  s    r   r  DynamicSliceSize.get_readsA  r  r   c                    gr  r   r  s    r   r'   DynamicSliceSize.should_allocateD  r  r   c                   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        X0l        X@l        XPl	        g r%  )
r  r  r  r  r  unbacked_size_symbolr  r  r  r  )r  r;  r  r  r  r  r  s         r   r  DynamicSliceSize.__init__G  s>     	ze1DErJ$8!
		r   c                .    [        U R                  /5      $ r   )r:   r;  r  s    r   r  )DynamicSliceSize.get_unbacked_symbol_defsW  s    444566r   c                t    [        U R                  U5      R                  [        U R                  U5      5      $ r   )r(   r  r  r  r`  s     r   ra  %DynamicSliceSize.get_free_symbol_usesZ  s0      

M:@@TXX}5
 	
r   c                &    UR                  U 5        g r   )codegen_dynamic_slice_sizer%
  s     r   r&
  DynamicSliceSize.codegenb  r
  r   )r  r  r  r  r;  r  r  )
r;  r1  r  r2  r  r2  r  r2  r  r2  r  r  r  r
  r3  r  s   @r   r5  r5  1  s    * ( &	
 ' ' 7 ./$)
!
	!
 0
1 1r   r5  c                  h   ^  \ rS rSrSrS	S jrS
S jr        SU 4S jjrSS jrSS jr	Sr
U =r$ )r   if  z3
The result of a call to aten._local_scalar_dense.
c                    [        5       $ r   r9   r  s    r   r  DynamicScalar.get_readsk  r  r   c                    gr  r   r  s    r   r'  DynamicScalar.should_allocaten  r  r   c                   > UR                  5         [        TU ]	  S [        [        R
                  " S5      S9U R                  U/5      5        Xl        X l        g r%  )	r  r  r  r  r  r  r  symkeypath)r  rJ  rK  rM  r  s       r   r  DynamicScalar.__init__q  sI     	*ELL$78$:M:Mtf:U	
 r   c                .    [        U R                  /5      $ r   )r:   rJ  r  s    r   r  &DynamicScalar.get_unbacked_symbol_defs{  s    488*%%r   c                &    UR                  U 5        g r   )codegen_dynamic_scalarr%
  s     r   r&
  DynamicScalar.codegen~  s    &&t,r   )rK  rJ  r  r  )rJ  r1  rK  zpytree.KeyPathrM  r   r   r   r  r
  )r   r   r   r   r  r  r'  r  r  r&
  r   r  r  s   @r   r   r   f  sF    *8@F	&- -r   r   c                     ^  \ rS rSrSrS
S jrSS jrSU 4S jjrSS jr\	" S 5       S   SS jj5       r
SS jrS	rU =r$ )r   i  z-
The result of a call to aten._assert_scalar
c                    [        5       $ r   r9   r  s    r   r  AssertScalar.get_reads  r  r   c                    gr  r   r  s    r   r'  AssertScalar.should_allocate  r  r   c                v   > [         TU ]  S [        [        R                  " S5      S9/ 5        Xl        X l        g r%  )r  r  r  r  r  scalarrm
  )r  rX  rm
  r  s      r   r  AssertScalar.__init__  s3    ell512	
 r   c                    gra  r   r  s    r   r  AssertScalar.has_side_effects  r  r   c                .    [        U R                  U5      $ r   )r(   rX  r`  s     r   ra  !AssertScalar.get_free_symbol_uses  s      ];;r   c           	        [         R                  (       d  g [        [        U R	                  SS95      5      n[
        R                  R                  (       a  g [
        R                  R                  (       a^  SU S3n[
        R                  R                  R                  U R                  SS9nUR                  SU SU R                   SU S	35        g [
        R                  R                  R                  U R                  SS9nUR                  S
U S35        UR                  S[        U R                  5       S35        UR                  U R!                  5        S35        g )NFrh  zstd::to_string(r  )rJ  zif (!(z()) { throw std::runtime_error("Expected z but received " + z); }zif not (z):z    raise RuntimeError(z = None)rB   scalar_assertsrC  rD  ra  rn   r  r:
  r+
  rw  codegen_cpp_sizevarrX  ry  rm
  codegen_python_sizevarr  rs  )r  r
  symbol
symbol_strsizevars        r   r&
  AssertScalar.codegen  s4   $$ d44454IJK77WW  *6(!4Jgg**>>e ? G 	!J488*Tfgqfrrwx gg**AAe B G 	45 7TXX7GqIJ  19:r   )rm
  rX  r  r  )rX  rp   rm
  r   r   r   r  r  r
  )r   r   r   r   r  r  r'  r  r  rY   ra  r&
  r   r  r  s   @r   r   r     sR    	 N+$)<!<	!< ,<
; ;r   r   c                  *    \ rS rSr% S\S'   S\S'   Srg)ExternKernelNodei  r   r   zexport_schema.Noder   r   Nr   r   r   r   rg  rg    s    
I
r   rg  c                  
  ^  \ rS rSrSr SSS.               SU 4S jjjjrSU 4S jjrSS jrSS jrSS	 jr	\
      SS
 j5       rSS jrSS jrSS jrS r\SS j5       r\
SS j5       r\SS j5       rSrU =r$ )FallbackKerneli  z
A class that represents a fallback kernel for handling operators that are not
directly support by inductor. It currently supports functional ops, view ops,
inplace aten ops, and mutating ops that are auto-functionalizable.
Nr	  c                 >^  [         TT ]  U[        U5      [        U5      US9  ST l        U=(       d    0 T l        [        U[        R                  R                  [        R                  R                  45      (       d   SU S[        U5       S35       eUT l        UT l        Uc  0 OUT l        T R                  c   e[        R                   R#                  T R                  5        / T l        / T l        [        T R                  [        R                  R                  5      (       a  g ST R                  R)                  5       ;   a  g T R                  R*                  n[        R,                  R.                  R1                  T R                  5      (       a-  T R&                  R3                  US   R5                  5       5        g SS jn	UR6                  (       aI  [9        T R                  5      (       d/  U	" T R                  5      (       d  [;        S	T R                   35      eT R                  T R<                  T R>                  5      u  pSU 4S
 jjn[        R,                  R.                  RA                  XU5       H  u  pU" X5        M     g )Nr  F#Fails to create FallbackKernel for r   not supported_c10d_functionalr   c                8   [         R                  R                  U R                  5       [         R                  R                  R
                  5      =(       dD    [        U S5      =(       a1    [         R                  R                  R
                  U R                  ;   $ )N
py_kernels)r  _C%_dispatch_has_kernel_for_dispatch_keyr   DispatchKeyFunctionalizer  rp  r]  s    r   has_functionalize_impl7FallbackKernel.__init__.<locals>.has_functionalize_impl  sg    88AA	588//==  L) HHH((66"--G	r   z'NYI: Can't generate FallbackKernel for c                >  >^  [        T R                  [        R                  5      (       a+  [        U[        [
        45      (       d   [        U5      5       e[        R                  " T R                  5      (       a  [        U[
        [        45      (       a   eUc  g T R                  c  g SU U4S jjn[        R                  " T R                  5      (       a  Ub  U H  nU" U5        M     g g [        R                  " T R                  5      (       d   eU" U5        g )Nc                $  > TR                   R                  U R                  5       5        TR                  c   eTR                  R                  (       a<  TR
                  R                  [        [        U R                  5       S9U T5      5        g g rJ  )	alias_namesr  rs  
alias_infois_writer 
  r  r  r  )r  infor  s    r   	add_aliasPFallbackKernel.__init__.<locals>.handle_aliasing_and_mutation.<locals>.add_alias1  sj      ''

5222??++))00&z'H!TR ,r   )r  r   r   r   )
r   r   r  ListTyper   r   library_utilsis_tensor_like_typerz  is_tensorlist_like_type)r|  r	  r}  optional_tensor_argr  s   `   r   handle_aliasing_and_mutation=FallbackKernel.__init__.<locals>.handle_aliasing_and_mutation#  s    $))U^^44!#e}55@tCy@500;; &cE4=9999{&  44TYY???/2+!"56 03 # %88CCCC#r   )r^  r  r   r   )r|  ztorch._C.Argumentr	  r   r   r   )!r  r  r   use_runtime_dispatchr	  r   r  r
  r
  r4
  r   r	  rO
  r   r	  rn   r  warn_fallbackry  r  r   r
  _libraryr  mutates_and_returns_first_argr  rs  
is_mutabler%   r  rj  r	  
zip_schema)r  rN  r	  rc
  nontensor_argsrO
  r   r	  schemaru  r   r  r|  r	  r  s   `             r   r  FallbackKernel.__init__  s*    	+.!	 	 	
 %*!!2!8bUZZ**EJJ,J,JK
 
 	X04<.W	X 
 ","Nb&&222	d556 '))+d&&

(F(FGG !1!1!6!6!88
 !!)) >>==d>N>NOO&&{1~'>'>'@A	 *4+;+;<<*4+;+;<<%9$:J:J9KL  **4;;8J8JK	> --88vNID(3 Or   c                @  > [         TU ]  5       nU R                  [        R                  R
                  R                  L a]  U R                   HM  n[        U[        5      (       d  M  UR                  [        R                  " UR                  5       5      5      nMO     U$ r   )r  rR  r	  r  _prims	rng_primsgraphsafe_run_with_rng_stater	  r   rR
  	with_readrC   r  rs  )r  r  r	  r  s      r   rR  FallbackKernel.get_read_writesE  sw    g-/u||55RRR))c>22"-"7"7$,,S\\^<#K * r   c           	     n    UR                  U R                  5       U R                  [        U SS 5      5      $ Nr	  )(codegen_unbacked_symbol_defs_for_outputsrs  r  r   r%
  s     r   codegen_unbacked_symbol_defs+FallbackKernel.codegen_unbacked_symbol_defsQ  s0    ??MMOT\\749Ld+S
 	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r  r   r5   rn   r  r  r   r:   r  r  r	  resolveds      r   r  'FallbackKernel.get_unbacked_symbol_defsV  _     '.A4 HHH0  **,=H '''hmmo..<r   c                ~   [         R                   " S S5      5       n[        U R                  5      (       d   eU R                   Vs/ s H  o!" UR	                  5       5      PM     nnU R                  X0R                  5      u  pE[        R                  R                  (       a  [        U R                  [        R                  R                  5      (       a  U R                  XE5      n[!        U R                  R"                  R$                  U5       VVs/ s H8  u  pb[        R                  R&                  R)                  X&R*                  5      PM:     nnnO9U Vs/ s H,  n[        R                  R&                  R)                  U5      PM.     nnU R,                  R/                  U5        U$ s  snf s  snnf s  snf )Nc                  *    \ rS rSr% S\S'   SS jrSrg))FallbackKernel.codegen_args.<locals>.Shimia  r   refc                    U R                   $ r   )r  r  s    r   rE  2FallbackKernel.codegen_args.<locals>.Shim.__repr__e  s    xxr   r   Nr  )r   r   r   r   r   rE  r   r   r   r   Shimr  a  s    H r   r  )r  	dataclassr	  rj  r	  rO
  r	  rn   r  r+
  r   r	  r  r
  r
  r
  r   r
  r
  rw  r
  r
  r   r  )r  r  r   rc
  r   r   params          r   r
  FallbackKernel.codegen_args`  s\   				  	  
	   ,,,,<@KKHKqtA//12KH**;8J8JK77:d.>.>

@U@U#V#V..t<D !$D$4$4$<$<$F$F M MHE $$33AG M  D
 EIIDqAGG((77:DDI 	6" I
 Js   F/?F43F:c                   U (       a*  U  Vs/ s H  n[        U[        5      (       a  M  UPM     snOS nU(       aD  U (       d   eU  Vs/ s H)  oDR                  5       (       d  M  UR                  5       PM+     nnUS   $ [        U[        R                  5      (       a  UR
                  $ [        U[        [        45      (       a  [        S U 5       5      nU Vs/ s H  ow(       d  M  UPM     nn[        U5      S:X  a  US   $ U HB  n[        U[        R
                  5      (       d   e[        UR                  5      (       d  M@  Us  $    US   $ g s  snf s  snf s  snf )Nr   c              3  N   #    U  H  n[         R                  S U5      v   M     g 7fr   )ri  find_devicer  s     r   r   -FallbackKernel.find_device.<locals>.<genexpr>  s)      $ (A **433'r  rA   )r   r	  r  r  r]
  r  r   r   r:   r   re   r   )rc
  ri
  r  non_torch_bind_tensor_argsr	  devices
device_setr  s           r   r  FallbackKernel.find_devicex  s3     $J1:a+IQJ 	#
 &;3>S;C..BR's~~';GS1:nell33!(((ntUm44# $ ($ J -7AJ&&vJGA7|q qz!!!&%,,7777&++&&!M " 1:3 K T Bs"   EEE$E
E)Ec                2    SSK Jn  U" U R                  5      $ )Nr   )	is_impure)torch._library.utilsr  r	  )r  r  s     r   r  FallbackKernel.has_side_effects  s    2 ))**r   c                .   [        U R                  [        R                  R                  [        R                  R
                  45      (       d+   SU R                   S[        U R                  5       S35       e[        U R                  [        R                  R
                  5      (       d_  SU R                  R                  5       ;  aA  U R                  R                  R                  (       a  [        U R                  5      (       a  / $ U R                  $ )Nrl  r  rm  rn  )r   r	  r  r
  r
  r4
  r   r   r
  r  r%   ry  r  s    r   ry  +FallbackKernel.get_inputs_that_alias_output  s    uzz44ejj6T6TU
 
 	
 2$2B2B1C2D$$%&n6	
 
 4++UZZ-K-KLL"$*:*:*?*?*AA  ((33&t'7'788I###r   c                P    [        U R                  5      S::  d   eU R                  $ r  )r   r  r  s    r   rs  !FallbackKernel.get_mutation_names  s'    4&&'1,,,"""r   c           
         [         R                  SU R                  5       U R                  5        [	        U [
        5      (       d   [        U 5      5       eU R                  U R                  U R                  5      u  pU R                  X5      nU R                   Vs/ s H  nU R                  " U40 UD6PM     nnU R                  n[        R                  R                  (       d  / UQUQ$ [!        S/ 5      nUR#                  XQU5      n      SS jn[	        U[$        R&                  R(                  R*                  5      (       a#  UR-                  US   US   5      R.                  n	OUR0                  R.                  n	[3        U	5      S:X  aB  U R4                  (       a  U R4                  OU R6                  n
U	S   R8                  nU" X5      /nO:[;        XR4                  5       VVs/ s H  u  pU" UR8                  U5      PM     nnnU R                  c   e[=        U R                  5       [>        R@                  " U R                  RC                  5       UU0 S9S9n[        RD                  RG                  U5        / UQUQ$ s  snf s  snnf )	a  
ProxyExecutor Design Note
We export the ExternFallbackNodes (for custom ops) into a serialized file
and run it with a host side proxy executor to address the ABI problem
This is currently only implemented for fbcode. Eventually, we will also make this work for OSS.
Detailed design doc can be found at
https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing
z4Extern kernel node added for node %s with target %s.Nc           	     
   [        U [        R                  [        R                  45      (       a  Un[        U[        [
        45      (       a  [        U5      S:X  d   eUS   n[        U [        R                  5      (       aT  [        U[        5      (       d   e[        R                  R                  [        R                  " UR                  5       S9S9$ Ub   e[        R                  R                  SS9$ [        U [        R                  5      (       a  [        U R                  5       [        R                  5      (       as  [        U[        5      (       d   [!        U5      5       e[        R                  R                  U Vs/ s H%  n[        R                  " UR                  5       S9PM'     snS9$ [        U [        R"                  5      (       a  [        U R                  5       [        R                  5      (       a  Uc8  [        R                  R                  [        R$                  R                  SS9S9$ [        U[        5      (       d   e[        R                  R                  [        R$                  R                  [        R                  " UR                  5       S9S9S9$ [        U [        R&                  5      (       a  [        R                  R                  US	9$ [)        S
[!        U 5       35      es  snf )NrA   r   r  )	as_tensorT)as_none)
as_tensors)as_optional_tensor)as_intzUnsupported return type )r   r  
TensorTypeNoneTyper   r   r   r   export_schemarq   r  TensorArgumentrs  r  getElementTyper   r   OptionalTypeOptionalTensorArgumentIntTypeRuntimeError)return_typer\  r   s      r   handle_single_outputFFallbackKernel.export_extern_kernel_node.<locals>.handle_single_output  s    +(8(8%..'IJJftUm44v;!+++ )Ck5+;+;<<%c62222(1188"/">">CLLN"S 9   ;&;(11888FFK88Z**,e.>.>> > "&(33AT&\A3$--44 $* #)C &44#,,.I#)  5   K););<<**,e.>.>B B >(1188+8+O+O+V+V$( ,W , 9   &ff5555(1188+8+O+O+V+V&3&B&B%+__%6' ,W , 9   K77$--44F4CC"%=d;>O=P#QRR7 s   ,L r   rA   )r  rj  r  r	  )r   r   )r  z6Union[torch.TensorType, torch.ListType, torch.JitType]r\  Union[IRNode, Sequence[IRNode]]r   zexport_schema.Argument)$r  r  rs  r	  r   ri  r   rO
  rj  r	  r
  r	  r
  rn   r  aot_moder$   serialize_inputsr  rZ
  	torchbindCallTorchBindr  returnsr
  r   r  r 
  r
  r   rg  r  r8   r   extern_kernel_nodesr  )r  r   r   r  ordered_kwargsr  
serializernamed_argumentsr  r  r  r  output_argumentsreturn_schemar\  r   s                   r   export_extern_kernel_node(FallbackKernel.export_extern_kernel_node  sl    			BMMO	
 $//;d;/**4;;8J8JK**48 99
9 !!#009 	 
 !!ww+T+N++*44
$55fFK3	SO3	S33	S $3	Sj fe55??MMNNmmDGT!W5==Gnn,,Gw<1 '+lldll8M8MG!!*..K 4[ JK .1,,-G 
 .H)M	 %!++ .H    +++##'',,.&(	
 	
$$T*''''K
` s   J*J
c                  ^ ^^ T R                   nUc   eUR                  S:X  a  [        U[        R                  R
                  5      (       d   [        U5      5       e[        R                  R                  (       a2  SSK
Jn  [        U5      U;  a  [        R                  SU5        ST l        OUR                  S:X  a:  [        U[        R                  R
                  5      (       d   [        U5      5       eOA[        R                  R                  (       a"  U[         R"                  R$                  ;  T l        [        R                  R                  (       a  [        U[        R                  R
                  5      (       a  T R                  (       d  SU4S jjmT R'                  T R(                  T R*                  5      u  nm[,        R.                  " UUU 4S	 jT R0                   5       5      n[3        U4S
 j[5        XRR6                  R8                  5       5       5      T l        T R;                  U5        T R                  (       a  T R=                  5       nT R>                  c   eT R                   c   eURA                  T RC                  5       T R>                  U 4S jT R                   UT RD                  (       a  T RD                  OT RF                  5        OcURI                  T 5        [        T RJ                  [L        5      (       a3  T RO                  U5        T RQ                  U5        T RS                  U5        T RU                  U5        g)r}  Nr)
  r   )inductor_fallback_opszG%s is missing a c-shim implementation, using proxy executor as fallbackT
_quantizedc                   > [        U [        R                  5      (       a  T" U R                  5       5      $ [        U [        R                  5      $ r   )r   r  r  r  
NumberType)r  	is_numbers    r   r  )FallbackKernel.codegen.<locals>.is_numberX   s=    a!3!344$Q%5%5%788!!U%5%566r   c              3  J   >#    U  H  nTR                   " U40 TD6v   M     g 7fr   )r
  )r   r
  r   r  s     r   r   )FallbackKernel.codegen.<locals>.<genexpr>e   s(      ? ))!6v6?s    #c              3  z   >#    U  H0  u  p[        U[        5      =(       a    T" UR                  5      v   M2     g 7fr   )r   complexr
  )r   r  r  r  s      r   r   r  j   s2      ,DDA 1g&A9Q[[+AADs   8;c                 H   > / T R                  5       QT R                  5       Q$ r   )r
  r
  r  s   r   r  (FallbackKernel.codegen.<locals>.<lambda>x   s"    F$++-F0C0C0EFr   )r  ztorch.JitTyper   r   )+r	  r,
  r   r  r
  r
  r   rn   r  r+
  torchgen.aoti.fallback_opsr  r   r  r  r  rB   r  custom_ops_to_c_shimsrO
  rj  r	  r  r2  r	  r{  r   r
  r
  r"
  r  r	  ,generate_fallback_kernel_with_runtime_lookuprs  r  r 
  generate_fallback_kernelrN  r  r
  r
  r
  r  )	r  r
  r	  r  r   	args_iterexported_argsr  r   s	   `      @@r   r&
  FallbackKernel.codegen1   sz    !!!!!v%fejj&;&;<<Jd6lJ<ww""Lv;&;; KKa 15D--fejj&;&;<<Jd6lJ<WW   f11GGG % GG65::#8#899--7  ..t{{D<N<NOLD& "!??I ), ,	>>+C+CD, )D%
 	W%$$ ::<M**666##///@@''F   $$2G2G ,,T2$++v..))'2..w7,,W5))'2r   c           	         Sn U R                  5       n[        U R                  U R                  [        U R                  5       5      [        U R                  5       5      US9$ ! [         a     N[f = f)NFr  )rR  r  rP  r  r  r[   r  r  )r\  rR  s     r   tensor_to_layoutFallbackKernel.tensor_to_layout   sj    		((*I MMLL%fkkm4%fmmo6
 	
  		s   A# #
A0/A0c           
       ^ ^^^ [         R                  4nX;  a,  [        [        S   [        R
                  R                  5      nO
[        5       nU   T R                  " U/UQ70 UD6u  nnnn	n
SSS5        [        S W 5       5      mT R                  UW5      nU(       dp  [        U[        R                  R                  R                  5      (       d'  U[        R                   R"                  R$                  L a  [        R&                  " S5      nUc  T " [)        US9UUWW	UW
S9mO!U(       d   S5       eT " [+        US9UUWW	UW
S9mSU UUU4S jjmT" U/ 5      n[        U[,        [.        45      (       a	  UTl        U$ [        U[2        5      (       a  [/        U5      Tl        U$ U/Tl        U$ ! , (       d  f       GNK= f)	z9Create an instance of FallbackKernel from an _OpOverloadsNc              3  8   #    U  H  n[        U5      v   M     g 7fr   )rx  r	  s     r   r   (FallbackKernel.create.<locals>.<genexpr>   s     !K{,s"3"3{r  r   r  r   r	  z"Not sure where to find device infoc                J  >^ ^ [        T [        [        45      (       a/  [        T 5      " UUU 4S j[	        [        T 5      5       5       5      $ [        T [        5      (       a<  T R                  5        VVs0 s H  u  p#UT" UT[        T 5      U4/-   5      _M      snn$ [        T [        R                  5      (       a}  [        TR                  T 5      TT5      n[        R                  (       d  T(       d  [        T 5      (       d3  [        R                   R"                  R%                  UR&                  5        U$ [        T [(        5      (       a  T $ [        T [        R*                  5      (       a  T R,                  R.                  $ T b   S[        T 5       S35       eg s  snnf )Nc              3  Z   >#    U  H   nT" TU   T[        T5      U4/-   5      v   M"     g 7fr   )r   )r   r   generate_outputr  r\  s     r   r   AFallbackKernel.create.<locals>.generate_output.<locals>.<genexpr>   s7      $/ $F1Iw4<:K9L/LMM/s   (+zFallbackKernel output type z is not supported)r   r   r   r   r   r   r   r  r  r]
  MultiOutputr  rB    assume_unaligned_fallback_outputrk   rn   r  rz  r  r   r   r
  r   r)  )	r\  r  r  r0  r  r  r  has_unaligned_inputpackeds	   ``   r   r  .FallbackKernel.create.<locals>.generate_output   s[   &4-00F| $"3v;/$   FD)) %+LLN$2 g$v,9L8M.MNN$2  FELL11!((0 ;;*,V44GG--11#((;
FC((FELL11{{'''~ 1$v,?PQ~ 3s   6%F)r\  r   r  zlist[tuple[Any, int]]r   r   )r)
  *_fused_moving_avg_obs_fq_helper_functionalr   r	   rn   r  r  r
   rn
  r{  r  r   r  rZ
  r  r  rl   higher_orderprintr  r  rw	  r   r   r  r   )r  r	  r   r   fake_incorrect_kernelscontextri
  rc
  rd
  rO
  r	  r  r  r  r  r  s   `            @@@r   r  FallbackKernel.create   s    #'"Q"Q!S/1$79J9JKG!mG ""6;D;F;!  "!K{!KKn= vu66@@NNOO//555\\%(F!&)"3F ???6!0"3F 	  	D "."5ge}--$FN  &&"7^FN  &YFNo Ws   F44
G)ry  r   r  r	  r	  rO
  r  r   rN  r  r	  r|   rc
  r  r  r  rO
  r  r   r
  r	  ,Optional[dict[sympy.Symbol, pytree.KeyPath]]r   r   r  r
  r  r
  )rc
  z Optional[Sequence[torch.Tensor]]ri
  r  r   r   r  r  )r\  rT  r   rP  )r	  r|   r   r   r   r   r   ri  )r   r   r   r   r  r  rR  r  r  r
  r  r  r  ry  rs  r  r   r&
  r  rF  r  r   r  r  s   @r   ri  ri    s    ,0u4 KOu4u4 u4 &	u4
 &u4 +u4 )u4 Hu4 
u4 u4n


 0 5GT	 >+$*#w(r S3 S3j 
 
 _ _r   ri  c                  l   ^  \ rS rSrSrS	S jrS
S jrSSS.               SU 4S jjjrSrU =r	$ )ComplexViewi   z9View a complex number as two dtyped numbers or vice versac                    gr  r   r  s    r   r'  ComplexView.should_allocate   r  r   c                &    U R                  S5      /$ r  r  r  s    r   ry  (ComplexView.get_inputs_that_alias_output!  s    "##r   Nr  c          
     ,   > [         TU ]  UUUUUUUS9  g )Nr  )r  r  	r  rN  r	  rc
  r  rO
  r   r	  r  s	           r   r  ComplexView.__init__!  s,     	/ 	 	
r   r   r  r  )rN  r  r	  r|   rc
  r  r  r  rO
  r  r   r
  r	  r  r   r   )
r   r   r   r   r  r'  ry  r  r   r  r  s   @r   r  r     sq    C$ )-JN

 
 &	

 &
 +
 &
 H
 

 
r   r  c                  "    \ rS rSrSrSS jrSrg)MemoryCheckKerneli!  z
Custom kernel for memory checking that generates direct function calls

TODO - the custom op was erroring with str inputs. should be able to custom op directly.
c                    UR                  5         U R                  u  p#n[        U5      n[        U5      nU(       a  UR                  S5        SU SU SU S3nO	SU SU S3nUR                  U5        g)z.Override codegen to write direct function callzV# note: dont currently distinguish between buffers returned and dealloc'd in last stepzcheck_memory_step(allocated=z, freed=z, is_final_step=r  N)r
  r	  r  ry  )r  r
  
alive_list	dead_listis_final_step
alive_repr	dead_reprcalls           r   r&
  MemoryCheckKernel.codegen"!  s     	224/3/A/A,
}*%
O	h 2*Xi[P`an`oopqD1*Xi[PQRD$r   r   Nr
  )r   r   r   r   r  r&
  r   r   r   r   r  r  !  s     r   r  c                  *    \ rS rSr% S\S'   SS jrSrg)rw	  i4!  r  r  c                    U R                   $ r   r  r  s    r   r  MultiOutputLayout.get_device8!  r  r   r   Nr  )r   r   r   r   r   r  r   r   r   r   rw	  rw	  4!  s    r   rw	  c                     ^  \ rS rSrSS jr S	         S
U 4S jjjr\" S 5       S	   SS jj5       rSS jrSS jr	Sr
U =r$ )r  i<!  c                    UR                  U 5        U R                  (       d#  U R                  U5        U R                  U5        g g r   )codegen_multi_output!skip_size_stride_alignment_checksr
  r
  r%
  s     r   r&
  MultiOutput.codegen=!  s:    $$T*55%%g.**73 6r   c                   > [         TU ]  S X/S5        [        R                  R	                  U 5      U l        [        R                  R                  U 5        X0l        X@l        g r  )	r  r  rn   r  r  r   r  r  r   )r  rN  r	  r  r   r  s        r   r  MultiOutput.__init__C!  sK     	vw3GG++D1		""4(1R.r   c                z    U R                   S   n[        U[        5      (       d   U5       eUR                  U5      $ r  )rj  r   r   ra  )r  ri  r  s      r   ra   MultiOutput.get_free_symbol_usesP!  s:     [[^
*f--9z9-..}==r   c                z    [        U R                  5      S:H  =(       a    [        U R                  S   [        5      $ )NrA   r   )r   rj  r   rs	  r  s    r   r'  MultiOutput.should_allocateX!  s0    4;;1$ 
t{{1~'89	
r   c                    U R                    Vs/ s HI  n[        U[        5      (       d  M  [        UR	                  5       5      S:  d  M9  UR                  5       PMK     sn$ s  snf r  )rj  r   ri  r   ry  rs  r  s     r   ry  (MultiOutput.get_inputs_that_alias_output]!  s\     {{
"#~.  C4467!; CLLN"
 	
 
s   A"A"A")r  r   r   r
  r  )
rN  r  r	  r   r  zlist[tuple[Any, ...]]r   r   r   r   r  r  r  )r   r   r   r   r&
  r  rY   ra  r'  ry  r   r  r  s   @r   r  r  <!  s    4 38SS S '	S
 ,0S 
S S M*$)>!>	!> +>


 
r   r  c                     \ rS rSr% SrS\S'   S.S jrS/S jrS0S jrS1S jr	S2S	 jr
S3S
 jrS4S5S jjrS6S jrS7S jrS8S jrS.S jrS7S jr S9     S:S jjrS;S jrS<S jr S9     S=S jjrS>S jrS?S jrS@S jrSAS jrSBS jrSCS jrS.S jrS.S jrSDS jrSES jrS3S  jr SES! jr!SBS" jr"\#" S 5       S9   SFS# jj5       r$SGS$ jr%SHS% jr&S4SIS& jjr'\(SJS' j5       r)SKS( jr*SJS) jr+SCS* jr,\(SLS+ j5       r-S3S, jr.\.r/S-r0g)Mr  ih!  z;
TensorBox / StorageBox allow in-place mutation of Tensors
r   rM  c                6    U R                   R                  5       $ r   r  r  s    r   r  !MutableBox.has_exceeded_max_readsp!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_devices!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.make_loaderv!      yy$$&&r   c                6    U R                   R                  5       $ r   )rM  r  r  s    r   r  MutableBox.make_indexery!      yy%%''r   c                6    U R                   R                  5       $ r   )rM  r2  r  s    r   r2  MutableBox.get_stride|!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rs  MutableBox.get_name!  r  r   Nc                8    U R                   R                  U5      $ r   )rM  r.  r,  s     r   r.  MutableBox.has_large_inner_fn!  s    yy++I66r   c                8    U R                   R                  U5      $ r   r  r1  s     r   r3  MutableBox.mark_reuse!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r7  MutableBox.realize_hint!  r4  r   c                6    U R                   R                  5       $ r   )rM  rp  r  s    r   rp  MutableBox.unwrap_view!  r1  r   c                6    U R                   R                  5       $ r   )rM  r)  r  s    r   r)  MutableBox.is_input_buffer!      yy((**r   c                6    U R                   R                  5       $ r   )rM  r<  r  s    r   r<  MutableBox.freeze_layout!  s    yy&&((r   c                8    U R                   R                  X5      $ r   )rM  rA  r?  s      r   rA  *MutableBox.freeze_layout_with_stride_order!  s     yy88NNr   c                8    U R                   R                  U5      $ r   )rM  rF  rE  s     r   rF  (MutableBox.freeze_layout_with_fill_order!  s    yy66u==r   c                8    U R                   R                  U5      $ r   )rM  rJ  rI  s     r   rJ  (MutableBox.freeze_layout_with_same_order!  s    yy66v>>r   c                8    U R                   R                  X5      $ r   )rM  rO  rM  s      r   rO  +MutableBox.freeze_layout_with_exact_strides!  s     yy99-WWr   c                6    U R                   R                  5       $ r   )rM  rR  r  s    r   rR  MutableBox.get_read_writes!  rC  r   c                6    U R                   R                  5       $ r   rl  r  s    r   r  MutableBox.get_reads!  r  r   c                6    U R                   R                  5       $ r   ri  r  s    r   rZ  MutableBox.num_reads!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   r]  MutableBox.get_storage_numel!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rd  MutableBox.get_reduction_type!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rg  MutableBox.get_reduction_size!  r  r   c                6    U R                   R                  5       $ r   r  r  s    r   rj  MutableBox.is_extern!  r  r   c                6    U R                   R                  5       $ r   )rM  rm  r  s    r   rm  MutableBox.is_no_op!  r  r   c                8    U R                   R                  U5      $ r   r  r  s     r   rp  MutableBox.constant_to_device!  s    yy++F33r   c                6    U R                   R                  5       $ r   )rM  rs  r  s    r   rs  MutableBox.get_mutation_names!  r  r   c                6    U R                   R                  5       $ r   )rM  rv  r  s    r   rv  MutableBox.get_operation_name!  r  r   c                6    U R                   R                  5       $ r   )rM  ry  r  s    r   ry  'MutableBox.get_inputs_that_alias_output!  s    yy5577r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.realize!  r  r   c                8    U R                   R                  U5      $ r   r~  r`  s     r   ra  MutableBox.get_free_symbol_uses!  s     yy--m<<r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_read_names!  r  r   c                6    U R                   R                  5       $ r   )rM  r  r  s    r   r  MutableBox.get_defining_op!  rC  r   c                8    U R                   R                  U5      $ r   )rM  r	  r  s     r   r	  MutableBox.codegen_reference!  s    yy**622r   c                6    U R                   R                  5       $ r   rM  r  r  s    r   rN  MutableBox.layout!  s     yy((**r   c                6    U R                   R                  5       $ r   r  r  s    r   r  MutableBox.get_layout!  r  r   c                6    U R                   R                  5       $ r   rq  r  s    r   r  MutableBox.get_output_spec!  rC  r   c                6    U R                   R                  5       $ r   r  r  s    r   r	  MutableBox.get_size!  r  r   c                .    U R                   R                  $ r   )rM  r  r  s    r   r  MutableBox.dtype!  s    yyr   c                ~   [        U R                  [        5      (       aQ  [        U 5      R                   S[        U R                  5      R                   S3nSnU R                  R                  nO&[        U 5      R                   S3nU R                  nSnU[        [        U5      5      U/nSR                  U5      $ )Nr  z))r  r  )r   rM  r  r   r   r  r   r  )r  line0endlr  r  s        r   r   MutableBox.__str__!  s    dii,,Dz**+1T$))_-E-E,FaHEDIINNEDz**+1-EIIED 3u:

 yyr   r   r  r  r  r  r  r  r   r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )1r   r   r   r   r  r   r  r  r  r  r2  rs  r.  r3  r7  rp  r)  r<  rA  rF  rJ  rO  rR  r  rZ  r]  rd  rg  rj  rm  rp  rs  rv  ry  r  rY   ra  r  r  r	  r  rN  r  r  r	  r  r   rE  r   r   r   r   r  r  h!  sb    L2&'(&$7+('+) ;@O"O37O	O
>? HMX/X@DX	X
+%%-..%$4..8# L)$)=!=	!= *=
*+3 + +&+$   " Hr   r  c                  d    \ rS rSr\\SS j5       5       r\\SS j5       5       r\S	S j5       rSrg)
r   i "  c                    g r   r   rM  s    r   r  TensorBox.create"  s    FIr   c                    g r   r   r  s    r   r  r  "  s    +.r   c                X    [        U [        5      (       a  U $ [        [        U 5      5      $ r   )r   r   r   rq  r  s    r   r  r  "  s%    d122KD)**r   r   N)rM  r   r   r   )rM  r   r   r   )rM  r   )r   r   r   r   r   r  r  r   r   r   r   r   r    "  s@    I  I.  .+ +r   c                  r    \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSrg)rq  i"  z/
StorageBox allow in-place mutation of Tensors
c                    [        U R                  [        [        45      (       a5  U R                  R	                  5       [
        R                  R                  ;   $ gr  )r   rM  r  rS  rs  rn   r  r(  r  s    r   r)  StorageBox.is_input_buffer"  s=    dii+!?@@99%%'177+?+???r   c                    [        U R                  [        5      =(       a5    U R                  R                  5       [        R
                  R                  ;   $ r   )r   rM  ri  rs  rn   r  rT
  r  s    r   r  StorageBox.is_module_buffer"  s9    tyy>3 :		""$(9(99	
r   c           
        [         R                  U R                  5      (       a  U R                  R                  5       $ [	        U R                  [
        [        [        [        45      (       d   [        U R                  5      5       eU R                  R                  5       nU R                  R                  5       nU R                  R                  5       nUc   e[        S [        UU R                  R                  5       U R                  R!                  5       SS9U R                  S9U l        ["        R$                  R'                  U R                  5      U R                  l        ["        R$                  R+                  U R                  5        U R,                  U R                  l        XR                  l        X R                  l        U R                  R(                  $ )NF)r  r  r  rR  r  )r   r  rM  rs  r   rO  r  r  r<  r   r  r  r  r  r  r  r	  rn   r  r  r   r  ry  r}  r{  )r  r}  r{  r  s       r   r  StorageBox.realize"  sR   ""499--99%%''$))iD$%GHH 	
$IIK
 	
H ii//1II++-	%%'!!!"!ii))+YY'')	 	
	 00;			""499- LL		 +		'		yy~~r   c                    [        U R                  [        [        45      (       a:  U R                  R	                  5       R
                  S:  a  U R                  5         ggg)z<
Called on buffers we expect to be forced to realize later.
rA   N)r   rM  rO  r  r$  nontrivial_read_countr  r  s    r   r7  StorageBox.realize_hint<"  sI    
 tyy9i"899		**,BBQFLLN G :r   c                <   SSK Jn  U R                  5        Vs/ s H1  nU" U5      (       a  M  [        R                  R                  U5      PM3     nnU(       d  g[        U5      n[        U5      n[        U5      nXQ:  =(       a    XV-  S:  =(       a    Xg:H  $ s  snf )Nr   )is_nonfreeable_buffersFr   )	r	  r  r  rn   r  get_dep_size_hintr~  r-  r|  )r  r-  r  r  size_of_reads
total_sizemax_sizemin_sizes           r   $has_accumulated_enough_reads_by_size/StorageBox.has_accumulated_enough_reads_by_sizeF"  s    @ ~~'
')#. +AGG%%c*' 	 

 '
}%}%# %%*%$	

s
   B#Bc                2   [        U R                  [        5      =(       aw    U R                  5       [        R
                  :  =(       dO    U R                  5       =(       d8    [        R                  S L=(       a    U R                  [        R                  5      $ r   )	r   rM  rO  rZ  rB   realize_acc_reads_thresholdr.   realize_acc_reads_size_thresholdr  r  s    r   r  !StorageBox.has_exceeded_max_readsY"  sq    $))Y/ 	
NNvAAA &&( 77tC ==;;		
r   c                r  ^ US:  a  [        U R                  [        [        45      (       a  [	        U R                  5      (       a9  U R                  R                  5       mSS/n[        U4S jU 5       5      (       a  gU R                  5       [        R                  :  =(       d    U R                  5       $ g)zR
A heuristic to decide if we should realize a tensor
that is used multiple times.
rA   expsigmoidc              3  @   >#    U  H  oTR                   ;   v   M     g 7fr   )used_ops)r   r   opcounts     r   r   5StorageBox.should_realize_on_reuse.<locals>.<genexpr>o"  s     @iG,,,is   TF)r   rM  rO  r  r*  r$  r{  rZ  rB   realize_reads_thresholdr.  )r  r2  	heavy_opsr  s      @r   should_realize_on_reuse"StorageBox.should_realize_on_reusee"  s    
 19DII	9/EFFdii  ))446"I.	@i@@@ 6#A#AA -**, r   c                R    U R                  U5      (       a  U R                  5         g g r   )r  r  r1  s     r   r3  StorageBox.mark_reusew"  s!    ''..LLN /r   c                6    U R                   R                  5       $ r   ri  r  s    r   rZ  StorageBox.num_reads{"  r  r   r  Nr  r  r~  )r-  r   r   r   )r2  r   r   r   r  r  )r   r   r   r   r  r)  r  r  r7  r  r  r  r3  rZ  r   r   r   r   rq  rq  "  s4    

:
&

$%r   rq  c                  8    \ rS rSr% S\S'   S\S'   SrS\S'   S	rg)
Subgraphi"  r   r   rd  graph_moduleNzOptional[GraphLowering]r  r   )r   r   r   r   r   r  r   r   r   r   r  r  "  s    
I&&%)E")r   r  c                    U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     n n[        [	        S U  5       5      5      [        U 5      :  $ s  snf )Nc              3  8   #    U  H  n[        U5      v   M     g 7fr   )r/  )r   rs  s     r   r   '_has_aliased_buffers.<locals>.<genexpr>"  s     ;7"V**7r  )r   rS  rp  r   r:   )buffersrs  s     r   _has_aliased_buffersr  "  sd     F !+6? C CO  
 z;7;;<s7|KKs   1Ac                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'           SU 4S	 jjr	\
      SS
 j5       rSS jrSrU =r$ )InvokeSubgraphi"  z&
Ir node for the invoke_subgraph HOP.
NOptional[Subgraph]rO  Optional[Sequence[IRNode]]operandsr  c                   > [         TU ]  S UUS9  Xl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r
  )r  r  rO  rn   r  r  r   r  )r  rO  r  rN  r  s       r   r  InvokeSubgraph.__init__"  sO     	 	 	

 !GG++D1		""4(r   c                .  ^ SSK Jn  [        R                  R                  nSnUR
                  R                  S5      =n(       at  SnUR                  [        R                  R                  R                  L a8  UR                  S   [        R                  R                  R                  L d   eSnUS   US nOSnUR                  [        R                  R                  R                  L a8  UR                  S   [        R                  R                  R                  L d   eSnUR                  US nU V	s/ s H  oR
                  S	   PM     nn	U V	s/ s H  oR                  U	5      PM     nn	/ n
[        U5       HM  u  p[!        U["        [$        45      (       a  U
R'                  U5        M3  U
R'                  U" XU   5      5        MO     U
nUR                  cz  [        R                  R)                  UR*                  UUR,                  S
9Ul        [        R.                  " UR                  5         UR                  R0                  " U6   SSS5        UR                  R2                  nSnU H*  n[!        U["        5      (       a  M  UR5                  5       n  O   Uc   e[7        UU[9        US9S9m      SU4S jjn[        U5       VVs/ s H  u  nnU" UU5      PM     nnnUTl        U$ s  sn	f s  sn	f ! , (       d  f       N= fs  snnf )z|For each operand, get a realized input, force it to have the same
strides as the subgraph inputs, then use an InvokeSubgraphrA   )constrain_to_fake_tensorNeager_input_valsr   r   r   r   r0  rd  rM  rU  r  )rO  r  rN  c                Z  > [        U [        [        45      (       a  U $ U R                  5       nUc   e[	        [        UU R                  5       U R                  5       U R                  5       U R                  5       R                  U R                  5       R                  S9T[        U4/SS9$ )Nr  T)r   )r   r   rA  r  r  rP  r  r	  r2  r  rQ  rR  r   )r\  indr  invoke_subgraphs      r   create_output,InvokeSubgraph.create.<locals>.create_output"  s     &#8:N"OPP**,)))"%$..0#__.%002%00299"("3"3"5"?"? $C[M6: r   )r\  r   r  r   r   z?Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput])r
  r  rn   r  r	  rb  r  r  r  rl   r   r\
  r   r  r	  r   r   r   rR
  r  rN  r  r   rS  rT  graph_outputsr  r  rw	  r  )r  rO  r  r  r	  fake_operandsr  rQ  fx_operandsr   new_operandsr   operandr  r  r  r   r\  outsr  s                      @r   r  InvokeSubgraph.create"  s    	7 ww+++00445GHHHF""eii&<&<&I&II#((+uyy/E/E/U/UUUU,Q/8MF""eii&<&<&I&II#((+uyy/E/E/U/UUUU '++FG4K4?@KqVVE]KM@ AI!I1"3"3A"6!I%'%h/LC'$9>#JKK##G,##,WC6HI	 0  >>!WW22((,&mm 3 HN
 $$X^^4""M2 5 .... Gg'<== ++-   !!!($F3
		!$	L	. ;DG:LM:LYQfa(:LM"&O A "J* 54T Ns   8K6K;7L L 
Lc                &    UR                  U 5        g r   )codegen_invoke_subgraphr%
  s     r   r&
  InvokeSubgraph.codegen#  r6  r   )r   rO  )rO  r  r  r  rN  rw	  r   r   )rO  r  r  r   r   zElist[Union[ShapeAsConstantBuffer, NoneAsConstantBuffer, MultiOutput]]r
  )r   r   r   r   r  rO  r   r  r  r  rF  r  r&
  r   r  r  s   @r   r  r  "  s     $(H '+/H(/*.G'.
) 
),<
)FW
)	
) dd,2d	Nd dL. .r   r  c                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S	'   Sr	S\S
'   Sr
S\S'                 SU 4S jjr\SS j5       r\          SS j5       rSS jrSS jrSrU =r$ )Conditionali#  a  
IR node representing torch.cond

Attributes:
    predicate: A boolean scalar tensor determining which branch to execute.
    operands: Input tensors passed to both true and false subgraphs.
    true_subgraph: Subgraph executed when predicate is True.
    false_subgraph: Subgraph executed when predicate is False.
    outputs: MultiOutput nodes representing the conditional's outputs.
Nr  	predicater  r  r  true_subgraphfalse_subgraphOptional[Sequence[MultiOutput]]r  c                  > Xl         X l        X0l        X@l        [	        U/UQ5      u  px[
        T	U ]  S UUUS9  Ub  X`l        [        R                  R                  U 5      U l        [        R                  R                  U 5        g N)r   rN  rj  r	  )r  r  r  r  _split_by_sym_typer  r  r	  rn   r  r  r   r  )
r  r  r  r  r  rN  r	  sym_argsrc
  r  s
            r   r  Conditional.__init__##  s     # *, 2I3I3I J"	 	 	
 (%6"GG++D1		""4(r   c                \    [        U [        5      (       a  U $ U R                  R                  $ r   )r   r   r   r)  )r   s    r   _maybe_exprConditional._maybe_expr?#  s"    aHvv{{r   c                
   U R                  U5      nU Vs/ s H  oPR                  U5      PM     nn[        R                  R                  R                  S   n[        U[        5      (       d   [        U5      5       e[        S U 5       5      (       d   eU Vs/ s H   n[        [        U5      R                  S   PM"     nn[        R                  R                  R                  S   n      SS jn	X#4 H  n
U
R                  b  M  [        R                  R                  U
R                  UU
R                  S9U
l        [        R                  " U
R                  5         U
R                  R                   " U6   U	" U
R                  R"                  U5      U
R                  l        SSS5        M     UR                  c   eUR                  c   eUR                  R"                  nUR                  R"                  nSU4SU44 H&  u  p[%        U5      (       d  M  ['        S	U S
U 35      e   [)        U5      [)        U5      :X  d   X45       e[+        [-        X5      5       H  u  nu  nnUR/                  5       UR/                  5       :X  d
   UUU45       eUR1                  5       UR1                  5       :X  d
   UUU45       eUR3                  5       R4                  UR3                  5       R4                  :X  a  M   UUU45       e   [7        S XA/-    5       5      n[9        [        R                  R:                  R<                  [        R                  R                  R                  R?                  SS5      5      nUc   S5       e[A        UUUU[C        US9US9n[+        [-        U[        R                  R                  R                  S   5      5       VVVVs/ s H  u  nu  nn[E        [G        UR/                  5       b  UR/                  5       OUUR1                  5       URI                  5        Vs/ s H  n[@        RK                  U5      PM     snURM                  5        Vs/ s H  n[@        RK                  U5      PM     snUR3                  5       R4                  UR3                  5       RN                  S9U[P        U4/5      PM     nnnnnUUl)        U$ s  snf s  snf ! , (       d  f       GM  = fs  snf s  snf s  snnnnf )zNCreate a Sequence of IRNodes from a conditional statement (see .lowering.cond)r0  c              3  B   #    U  H  n[        U[        5      v   M     g 7fr   )r   r8   r	  s     r   r   %Conditional.create.<locals>.<genexpr>U#  s     <1:a&&r  r0  c           	         / n[        X5       Hh  u  p4[        U[        5      (       a  UR                  U5        M-  UR                  [        R                  [        U5      UR                  5       SS95        Mj     U$ NFrr  )r   r   r   r  r  r
  r   r  )r  fake_tensorsretr\  r  s        r   _require_exact_strides2Conditional.create.<locals>._require_exact_stridesY#  sm     C #M @f&;<<JJv&JJ$::%f-t{{}E ; 	 !A Jr   Nr  true_fnfalse_fnzVOutput aliasing is currently not supported in compiled torch.cond. The outputs of the z% subgraph of torch.cond are aliased: c              3  p   #    U  H,  n[        U[        5      (       a  M  UR                  5       v   M.     g 7fr   )r   r   r  )r   os     r   r   r  #  s,      
+a!67 ALLNN+s   66r	  zcannot determine devicer  )r  r  r  r  rN  r	  r  )r  r  r  zSequence[torch.Tensor]r   r   )*r	  rn   r  r	  r   r   r   r   r   r   r8   rb  rN  r  r   rS  rT  r  r  r  r   r   r   r  r  r  rQ  rC  r5   r  r   r  r  rw	  r  rP  r  r  r  rR  r   r  )r  r  r  r  r  r   r  r  fake_outputsr  rO  true_outputsfalse_outputsr   r  r   t_of_or  r	  conditionalr\  merged_outputr  s                           r   r  Conditional.createE#  s    %%i0	2:;(Q%%a((; ! 4 4 9 9" =+x00C${2CC0<<<<<<<GHKqdA++E2KHww++007	+	0	 	$ !+H~~%!"!6!6,,#0"*-- "7 "
 ((8NN&&6 4J 44l4HNN0 98 ," }}(((~~)))}}22 44(,7*m9TUMD#L11$**./TU\T]_  V < C$66U8UU6&s<'GHMAzS>>#s~~'77F!S#F7==?cmmo5D3}D5>>#**cnn.>.E.EET3PS}TE I  
+
 

 6GG&&GG  %%))*=tD
 !<#<<!!!#$F3/
: /8L!''"6"6";";E"BC/)
(/**FM'  ((*6 ",,. **,@M@R@R@TU@T"+11"5@TU>K>R>R>T>T//3>T ",,.55$//1;; &/) 	 
2 &_ <
 I: 98v V
s>   S'S##AS(AT
6S;T
)T AT
(
S8	;
T
c           	         UR                  U 5        UR                  U R                  5       U R                  [	        U S0 5      5        g r  )codegen_conditionalr  rs  r  r   r%
  s     r   r&
  Conditional.codegen#  s9    ##D)88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r  r  r  s      r   r  $Conditional.get_unbacked_symbol_defs#  r  r   )r  r   r  r  r  r	  )r  r   r  r  r  r  r  r  rN  rw	  r	  r  r   r   )r   zUnion[int, torch.SymInt]r   zUnion[int, sympy.Expr])
r  r   r  r  r  r  r  zlist[TensorBox]r   zlist[MultiOutput]r
  r  )r   r   r   r   r  r  r   r  r  r  r  r  r  r  rF  r  r&
  r  r   r  r  s   @r   r  r  #  s    	 #'I&+/H(/(,M%,)-N&-/3G,3)) #)  	)
 !) ") H) 
)8  
 zz z 	z
 "z 
z zx
   r   r  c                    / n/ nU  HF  n[        U[        5      (       a  UR                  UR                  5        M5  UR                  U5        MH     X!4$ r   )r   r   r  r)  )r   non_sym_argsr  r	  s       r   r  r  #  sO     LHc011OOCHH%$	  !!r   c                     ^  \ rS rSr% SrSrS\S'   SrS\S'   SrS\S'   Sr	S\S	'   Sr
S
\S'                   SU 4S jjr\SS j5       r\SS j5       r\            SS j5       rSS jrSS jrSrU =r$ )	WhileLoopi#  zSThe IR node for while_loop and while_loop_stack_output. It supports input mutation.Nr  carried_inputsadditional_inputsr  cond_subgraphbody_subgraphr  r  c                  > Xl         X l        X0l        X@l        [	        / UQUQ5      u  p[
        T
U ]  S UU	US9  Ub  X`l        Xpl        [        R                  R                  U 5      U l        [        R                  R                  U 5        g r  )r  r  r  r  r  r  r  r	  stack_outputrn   r  r  r   r  )r  r  r  r  r  rN  r	  r  r  rc
  r  s             r   r  WhileLoop.__init__#  s     -!2** 21n101!
 	"	 	 	
 (%6"(GG++D1		""4(r   c                   [        U 5      (       d  U $ U  Vs/ s H*  n[        U[        5      (       a  UR                  5       OUPM,     nn[	        5       n/ n[        X5       He  u  pV[        U5      U;   a&  UR                  [        R                  U5      5        M:  UR                  [        U5      5        UR                  U5        Mg     U$ s  snf r   )r  r   rS  rp  r:   r   r/  r  r  rB
  r  )r  rs  unwrapped_buffersseen_buffersr  original_inputunwrapped_buffers          r   _clone_aliased_inputsWhileLoop._clone_aliased_inputs$  s    #N33!! )
( %/v$G$GF VS( 	 
 )31303N0V,N"#|3l55nEF  $4!56n- 1W !
s   1Cc                    [        U [        5      (       a  U $ [        U [        [        45      (       a  [        U 5      $ [        U [        5      (       a  [        R                  U 5      $ [        S[        U 5       35      e)NzNYI unsupported output type: )r   r   rq  rS  r  r  r  r   )r   s    r   _maybe_wrap_as_tensor_box#WhileLoop._maybe_wrap_as_tensor_box'$  se    c9%%Jj/:;;S>![))##C((!>tCykJKKr   c                   SSK Jn        SS jn[        R                  R                  R
                  S   n[        R                  R                  R
                  S   n	X-   n
U
 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  S   PM     nnU	 Vs/ s H  oR                  S   PM     nnU Vs/ s H  oR                  U5      PM     nn[        R                  U5      nU" X5      nU Vs/ s H  oR                  U5      PM     nnU" UU5      nUU-   nX4 GH  nUR                  b  M  [        U
[        5      (       d   [        U
5      5       e[        R                  R                  UR                  U
UR                  S9Ul        [        R                   " UR                  5         UR                  R"                  " U6   UUL aZ  [%        UR                  R&                  5      [%        U5      :X  d   eU" UR                  R&                  U5      UR                  l        SSS5        GM     UR                  (       a  UR                  (       d   eUR                  R&                  nUR                  R&                  n[)        U5      (       a  [+        S	U 35      e[%        U5      S
:X  d   U5       eUS   n[        U[,        5      (       dM  UR/                  5       [0        R2                  :X  d   U5       e[%        UR5                  5       5      S:X  d   U5       e[%        U5      S:  d   S5       eUS   R7                  5       nUc   e[%        U5      [%        U5      :X  d	   UU45       e[9        [;        UU5      5       H  u  nu  nn      SS jnU" UR5                  5       UR5                  5       5        U" UR=                  5       UR=                  5       5        UR7                  5       UR7                  5       :X  d   UUUU45       eUR/                  5       UR/                  5       :X  a  M   UUU45       e   Uc   e[?        [        R                  R@                  RB                  [        R                  R                  R                  RE                  SS5      5      n[        UUUU[G        US9UUS9nUR                  b=  [        UR                  RH                  [0        RJ                  RL                  5      (       d   eU" UR                  RH                  U5      S   n[O        U5      nU Vs/ s H  nUU   PM
     n n[Q        U 5      n!/ n"/ Ul)        / Ul*        U(       Ga  [%        U5      S:X  d   S5       e[9        [        R                  R                  R                  S   5       H  u  nn#[W        [Y        U#RZ                  U#R\                  U#R_                  5        V$s/ s H  n$[`        Rc                  U$5      PM     sn$U#Re                  5        V%s/ s H  n%[`        Rc                  U%5      PM     sn%S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        M     GO[9        U5       GH  u  nn#UU;   ad  U[%        U5      :  d   S5       e[k        U!5      n'URT                  Ri                  [m        U'Rn                  U'U5      5        U"Ri                  U'5        Mq  [W        [Y        U#R7                  5       U#R/                  5       U#R5                  5       U#R=                  5       U#Rq                  5       Rr                  S9U[f        U4/5      n&URR                  Ri                  U&5        U"Ri                  U&5        GM     [;        UU"5       Hk  u  n(n)U(Ru                  5       [        R                  Rv                  ;   d  M4  [        R                  Rx                  R{                  U)Ru                  5       5        Mm     U"$ s  snf s  snf s  snf s  snf s  snf ! , (       d  f       GM   = fs  snf s  sn$f s  sn%f )zcreate the while_loop IR node. stack_output controls whether it stack
each iterations' output, which is necessary for training.
r   )check_input_alias_and_mutationc           	     V   [        U 5      [        U5      :X  d   e/ n[        X5       H}  u  p4[        U[        R                  5      (       aH  [
        R                  U5      nUR                  [        R                  XTR                  5       SS95        Ml  UR                  U5        M     U$ r  )r   r   r   r  r]
  r  r  r  r  r
  r  )tensor_boxesr  r  r  fknew_tbs         r   r  0WhileLoop.create.<locals>._require_exact_strides@$  s     |$L(9999Cl9b%,,// '@@DFJJ$::"IIKu ;  JJrN) :* Jr   r0  r0  Nr  zOutput aliasing is currently not supported in compiled torch.while_loop. The outputs of the body_fn subgraph of torch.while_loop are aliased: rA   z9torch.while_loop is assumed to have at least one operand.c                    [        U 5      [        U5      :X  d   e[        X5       H.  u  p#[        R                  R                  R                  X#5        M0     g r   )r   r   rn   r  r  r2  )	lhs_exprs	rhs_exprslhsrhss       r   _guard_list_equals,WhileLoop.create.<locals>._guard_list_equals$  sC     9~Y777 #I 9HCGG$$11#; !:r   r	  r  )r  r  r  r  rN  r	  r  r   z-NYI: while_loop_stack_output input mutations.)r  r  r  r  zonly carries can be mutated.)r  r  r  r  rQ  )r  r  r  z,list[Union[int, torch.SymInt, torch.Tensor]]r   r   )r   Sequence[Union[int, sympy.Expr]]r  r  r   r   )>torch._higher_order_ops.utilsr  rn   r  r	  r   rb  r	  r  r  r   r   r   rN  r  r   rS  rT  r   r  r  r  r   r  r  r   r	  r  r   r   r2  r5   r  r   r  rw	  modulefxGraphModuler:   rD  r  r 
  r  rP  r  r  r  r  r  r  r   r  rC  r  rN  r  rQ  rs  r(  r  r  )*r  cond_fnbody_fnr  r  r  r  r  fx_carried_inputsfx_additional_inputsfx_all_inputsr   fake_all_inputsfake_carried_inputsfake_additional_inputscarried_inputs_additional_inputs_
all_inputsrO  cond_outputsbody_outputsr  r  r   r^  bor  r	  
while_loopmutated_idxsmutated_idx_setr   r  mutated_inputs_iterall_outputsr\  r  r  	multi_outmutated_inputrl  r   s*                                             r   r  WhileLoop.create2$  s    	Q	*	F	 	: GG0055b9 ww3388<)@2?@-Q66%=-@6GH6Gvve}6GH9M!N9MA&&-9M!N9GHA,,Q/H#99/J0V<MN<Mq//2<MN3 6
 %'99
 *H~~%!-::OD<OO:!"!6!6,,#0"*-- "7 "
 ((8NN&&8  7*"8>>#?#?@C/E      8N$NN88/84 98 +4 }}..}}22}}22-- XXdWeg  < A%3|3%O!233;;=EJJ.11.qzz|$),1,):" 	
G	
" A))+!!!?#s<'88 	
;
 	
8 %S,%GHKAxB<;<;< < r{{}bkkm<r}}@ ==?bmmo5J2r67JJ5<<>R\\^3@aR[@3 I" !!!5GG&&GG  %%))*=tD

 *0!!$F3/%	

 }}(ZMM  %(("6"6.
 .
 	
 

 6MM  /

 %\25DE_c*S/_E #>2$&
&(
#'1, ?,  ))=)=)B)B5)IJV'%}}$llDJKKMRMbk55b9MRFLmmoVo 7 7 ;oV	 C[M		 "")))4""9-  K  )6V/)^!44T6TT4$()<$=M//66&}';';]JW  &&}5 +##)#4#4#6"("2"2"4!'!2#)#4#4#6#)#4#4#6#=#= #
!I &&--i8&&y1-  70 NK8HC||~!5!55 ++//? 9 U AH!NH O 98t F" SVs=   (`#`($`-`2?`7<A9`<a4a'a<
a	c           	         UR                  X R                  5        UR                  U R                  5       U R                  [        U S0 5      5        g r  )codegen_while_loopr  r  rs  r  r   r%
  s     r   r&
  WhileLoop.codegen%  s?    ""4):):;88MMOT\\749Lb+Q	
r   c                    [        U SS 5      =n(       aL  [        [        R                  R                  R
                  U5      nUc   e[        UR                  5       5      $ [        5       $ r  r  r  s      r   r  "WhileLoop.get_unbacked_symbol_defs%  r  r   )r  r  r  r  r   r  r	  )r  r  r  r  r  r  r  r  rN  rw	  r	  r  r  r   r   r   )r  r  r   r  )r   r   r   r   )r  r  r  r  r  r  r  r  r  r   r   r  r
  r  )r   r   r   r   r  r  r   r  r  r  r  r  r  r  r  rF  r  r&
  r  r   r  r  s   @r   r  r  #  s#   ]15N.54818(,M%,(,M%,/3G,3)() ,)  	)
  ) ") H) ) 
)D  2 L L WW W )	W
 ,W W 
)W Wr
   r   r  c                  r   ^  \ rS rSr SSS.               S	U 4S jjjjrS
U 4S jjrSS jrSrU =r$ )r   i%  Nrj  c          
        > [         T
U ]  UUUUUS US9  SSKJn  U" U5      n	U	c   eXl        [
        R                  R                  R                  U	S 5      U l	        U [
        R                  R                  U	'   g )Nr  r   )_get_effect)
r  r  torch._higher_order_ops.effectsr;  effect_typern   r  effectful_opsr  prev_effect_buffer)r  rN  r	  rc
  r  rO
  r   r	  r;  r=  r  s             r   r  EffectfulKernel.__init__%  s     	/ 	 	
 	@!&)&&&&"#''"7"7";";K"N-1k*r   c                   > [         TU ]  5       nU R                  bG  UR                  R	                  [
        R                  " U R                  R                  5       5      5        U$ r   )r  rR  r?  rV  r  rC   r  rs  )r  r  r  s     r   rR  EffectfulKernel.get_read_writes;%  sU    g-/"".!!$$T%<%<%E%E%GH r   c                    gra  r   r  s    r   r   EffectfulKernel.has_side_effectsE%  r  r   )r=  r?  r   r  r  r  )	r   r   r   r   r  rR  r  r   r  r  s   @r   r   r   %  s|     ,02 KO22 2 &	2
 &2 +2 )2 H2 
2 2: r   r   c                  @    \ rS rSr\" S 5       S   SS jj5       rSrg)r{
  iI%  c                    [        5       $ r   r9   r`  s     r   ra  !NonTensorObj.get_free_symbol_usesJ%  r!  r   r   Nr  r  )r   r   r   r   rY   ra  r   r   r   r   r{
  r{
  I%  s,    N+$)!	! ,r   r{
  c                  `    \ rS rSr% S\S'   S\S'   SS jrSSS jjrSS	 jrSS
 jrSS jr	Sr
g)r	  iQ%  r   r   +Union[FakeScriptObject, torch.ScriptObject]r  c                    U R                   $ r   r  r  s    r   rs  TorchBindObject.get_nameV%  r  r   Nc                    U R                   $ r   r  r  s     r   r	  !TorchBindObject.codegen_referenceY%  r  r   c                    U R                   $ r   r  r  s    r   rV
  TorchBindObject.get_value\%  r  r   c                    [        U R                  [        R                  5      (       a  U R                  $ U R                  R                  $ r   )r   r  r  ScriptObjectreal_objr  s    r   get_real_objTorchBindObject.get_real_obj_%  s3    djj%"4"455::::&&&r   c                   U R                  5       n[        U5      (       a  g[        US5      (       d   e[        UR	                  5       5      n[
        R                  " U5      S   nU Vs/ s HE  n[        U[        R                  5      (       d  M$  UR                  5       UR                  5       -  PMG     nn[        R                  " [        R                  US5      $ s  snf )Nr   __obj_flatten__)rS  r)   r  r   rV  rF
  rQ
  r   r  r]
  r>  numelr  r  operatorr  )r  real_script_obj	flat_dict
flat_elemsr   
flat_sizess         r   get_buf_bytesTorchBindObject.get_buf_bytese%  s    ++-/**(9::::88:;	((3A6
  
!U\\* )ANNqwwy( 	 

 j!<<
s   +#C%Cr   r  r   r  )r   rI  )r   ztorch.ScriptObjectr  )r   r   r   r   r   rs  r	  rV
  rS  r]  r   r   r   r   r	  r	  Q%  s&    
I66'=r   r	  c                  B    \ rS rSr% S\S'   S\S'   S
S jrSSS jjrS	rg)rR
  iw%  r   r   r  r  c                    U R                   $ r   r  r  s    r   rs  GeneratorState.get_name|%  r  r   Nc                    U R                   $ r   r  r  s     r   r	   GeneratorState.codegen_reference%  r  r   r   r  r   r  )r   r   r   r   r   rs  r	  r   r   r   r   rR
  rR
  w%  s    
I r   rR
  c                      \ rS rSrS	S jrS	S jrS
SS jjr\          SS j5       r\          SS j5       r	Sr
g)_CollectiveKerneli%  c                    gr  r   r  s    r   r'  !_CollectiveKernel.should_allocate%  r  r   c                    gra  r   r  s    r   r  "_CollectiveKernel.has_side_effects%  r  r   Nc                n   [        U R                  5      [        R                  R                  L d   S5       eU R                  nUb  Xl        OUR                  R                  U l        UR                  R                   Vs/ s H!  o3R                  (       d  M  UR                  PM#     snU l
        g s  snf )Nz,Setting cpp kernel needs a valid op_overload)r   r	  r  r
  r
  r	  r
  r   r
  r
  r	  )r  r	  r	  r   s       r   r
  %_CollectiveKernel.set_cpp_kernel_name%  s    D$$%)>)>> 	
:	
> !!&#2 #)>>#6#6D  #NN44.
4qFAFF4.
* .
s   B2B2c                   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H@  n
U
R	                  5         [         R                  R                  U
R                  5       5        MB     US   R                  5       nU " [        US9UUWW5      n[        R                  " U5      nUR                  R                  U Vs/ s H  n[        [        US9X5      PM     sn5        UR                  R                  U Vs/ s H  oR                  5       PM     sn5        SU;   a]  UR                  R                  [        [        US9US   U5      5        UR                  R                  US   R                  5       5        g g ! , (       d  f       GN~= fs  snf s  snf )Nr  r   r  r   )rn   r  r  rn
  r  r  rs  r  r  rF
  tree_leavesr 
  r  r  ry  r  )r  r	  rj  r   r   _example_outputrc
  rd
  rO
  r	  
tensor_argr  r  inpsr  rl  s                   r   create_inplace _CollectiveKernel.create_inplace%  s    WW ""6CDCFC!  %E2C1D&EE$%J GG''
(;(;(=> & Q**,f%
 !!&)&&OSTt^Jf5sCtT	

 	!!T"BTc<<>T"BCF?##**z8&-P %%fUm&<&<&>? ; 0 U #Cs   F/.G+G/
F>c           
     B   [         R                  R                     U R                  " X/UQ70 UD6u  nnnnn	S S S 5        W	(       a   U SU	 35       eW H*  n
[	        U
[
        5      (       a  M  U
R                  5         M,     [	        W[        5      (       a  U R                  Xe5      nUc   eU " [        US9UUWW5      n[        U5       VVs/ s H(  u  p[        U R                  U5      U[        U4/5      PM*     snnUl        [        UR                  U5       H_  u  p[        R                   (       d  [#        U5      (       a  M,  [         R                  R$                  R'                  UR(                  5        Ma     UR                  $ U " U R                  U5      UUWW5      n[        R                   (       d  [#        U5      (       d3  [         R                  R$                  R'                  UR(                  5        U/Ul        U$ ! , (       d  f       GN= fs  snnf )Nr  r  )rn   r  r  rn
  r   r	  r  r   r  rw	  r   r  r  r  r   rB   r  rk   rz  r  r   )r  r	  rj  r   r   ri
  rc
  rd
  rO
  r	  ro  r  r  r   rT  r  s                   r   create_out_of_place%_CollectiveKernel.create_out_of_place%  s    WW ""6CDCFC!  %F3D2E&FF$%Jj/::""$ & nd++__[AF%%%!0F "+>!: ";IA ((0AYK
 ";FN  #6>>>B::BSC C GG--11#((;	  C
 >>!$$^4F 66>O? ? ))--fkk:$XFNMe .s   H	/H	
H)r	  r	  r  r   r
  )
r	  r|   rj  zUnion[IRNode, list[IRNode]]r   r   r   r   r   r   )
r	  r|   rj  z!Union[TensorBox, list[TensorBox]]r   r   r   r   r   z+Union[list[MultiOutput], _CollectiveKernel])r   r   r   r   r'  r  r
  rF  rq  rt  r   r   r   r   re  re  %  s    

( )@)@ ,)@ 	)@
 )@ 
)@ )@B 99 29 	9
 9 
59 9r   re  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduce_Kerneli&  Nrj  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_all_reduce_r  r  r
  r  s	           r   r  _AllReduce_Kernel.__init__ &  =     	/ 	 	
 	  !NOr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g Nz+torch/csrc/inductor/aoti_torch/c/shim_cpu.hinclude_extra_headerr  r   rN  r  r
  r%
  s     r   r&
  _AllReduce_Kernel.codegen6&  C    $$%RS,,T2dkk6**%%g. +r   r   r   r  r
  rf  r  s   @r   rw  rw  &  s     ,0P KOPP P &	P
 &P +P )P HP 
P P,/ /r   rw  c                  b   ^  \ rS rSr SSS.               SU 4S jjjjrS	S jrSrU =r$ )
_AllReduceKerneli>&  Nrj  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  *aoti_torch_cpu__c10d_functional_all_reducerz  r  s	           r   r  _AllReduceKernel.__init__?&  s=     	/ 	 	
 	  !MNr   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g r~  r  r%
  s     r   r&
  _AllReduceKernel.codegenU&  r  r   r   r   r  r
  rf  r  s   @r   r  r  >&  s     ,0O KOOO O &	O
 &O +O )O HO 
O O,/ /r   r  c                     ^  \ rS rSr S
SS.               SU 4S jjjjrSS jrSS jr\SS j5       rSU 4S jjr	S	r
U =r$ )_WaitKerneli]&  Nrj  c          
     N   > [         TU ]  UUUUUS US9  U R                  S5        g )Nr  +aoti_torch_cpu__c10d_functional_wait_tensorrz  r  s	           r   r  _WaitKernel.__init__^&  r|  r   c                    UR                  S5        UR                  U 5        [        U R                  [        5      (       a  U R                  U5        g g r~  r  r%
  s     r   r&
  _WaitKernel.codegent&  r  r   c                   U R                   S   n[        U[        5      (       d   e[        U[        5      (       a7  UR                   S   n[        U[        5      (       d   [	        U5      5       eU/$ [        U[
        5      (       aG  UR                   S   n[        U[        5      (       a!  UR                  S   u  pEUR                   U   /$ / $ / $ r  )rj  r   r   re  r   r  r  )r  rl  r   collr   r   s         r   get_volatile_reads_WaitKernel.get_volatile_reads{&  s    kk!n#v&&&&c,--

1Aa((1$q'1(3J[)) ::a=D$ 122QC())I Ir   c                v   [         R                  R                     U R                  X5      u  nnnnnS S S 5        W(       a   U SU 35       eU " [	        UR                  5       S9UWWW5      nUR                  R                  [        [	        UR                  5       S9X(5      5        g ! , (       d  f       N}= f)Nr  r  )	rn   r  r  rn
  r  r  r 
  r  r  )	r  r	  rl  rn  rc
  rd
  rO
  r	  r  s	            r   create_wait_WaitKernel.create_wait&  s    WW ""6/!  %E2C1D&EE$cnn./
 	&&:S^^-=>L	
! s   B**
B8c                   > [         TU ]  5       nU R                  5       nU H@  nUR                  R	                  [
        R                  " UR                  5       5      5        MB     U$ r   )r  rR  r  rV  r  rC   r  rs  )r  r  volatile_readsvrr  s       r   rR  _WaitKernel.get_read_writes&  sS    g-/002 B!!,"6"6r{{}"EF !r   r   r   r  r
  r#  )r	  r|   rl  r   r   r   r  )r   r   r   r   r  r&
  r  rF  r  rR  r   r  r  s   @r   r  r  ]&  s     ,0P KOPP P &	P
 &P +P )P HP 
P P,/2 
 
* r   r  c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r7   r    r2   r   r   r:   r   r"   r
  r  r]
  r   r  r  s      r   r
  r
  &  s    !h%&&$Q''	At}	%	%u||$&A,Q//A 	Au||	$	$$Q''|r   c                V   [        U [        [        45      (       a  [        U 5      $ [        U [        [
        45      (       a5  [        [        R                     " 5       nU  H  nU[        U5      -  nM     U$ [        U [        R                  5      (       a  [        U 5      $ [        5       $ r   )r   r7   r    r1   r   r   r:   r   r"   r
  r  r]
  r  s      r   r
  r
  &  s    !h%&&A	At}	%	%u||$&A#A&&A 	Au||	$	$A|r   c                D   [        U [        5      (       Ga
  [        U R                  [        5      (       Ga  [        U R                  R                  [        5      (       a'  U R                  R                  R                  SU5        g [        U R                  R                  [        5      (       Gan  U R                  R                  R                  SU5        [        U R                  R                  [        5      (       ad  [        U R                  R                  R                  [        5      (       a1  U R                  R                  R                  R                  SU5        g [        U R                  R                  [        5      (       a  U R                  R                  R                  (       dk  [        U R                  R                  R                  S   [        5      (       a4  U R                  R                  R                  S   R                  SU5        g g g g g g g )Nr}  r   )r   r   rM  rq  r  r  rr  r  r  r  rj  )r  r  s     r   assign_origin_noder  &  sq    &)$$FKK)L)Lfkk&&..KK//qA((&11KK//qA&++**N;;
  %%uA A   %%88J 6;;++[99((00fkk..55a8&AAKK$$++A.AA-QRS B 1 : 2 *M$r   )r   r   r   zTypeIs[Union[int, Integer]])r   r   r   r   )r   r   r   r  )r   r  r   z&Callable[[Sequence[_T]], Sequence[_T]])r   z&Callable[[Sequence[_U]], Sequence[_V]]r   z&Callable[[Sequence[_T]], Sequence[_U]]r   rQ  r   )r   z(Sequence[Union[int, torch.SymInt, Expr]]r   zOptional[ShapeEnv]r   r  )r   Sequence[Union[int, Integer]]r   r  r  )r   r   r   r   r   r   )r   r   r   r   r   rT  )r   r  r   r   r   zOptional[torch.Tensor])r  zOptional[Sequence[_T]]r   z Optional[Sequence[Optional[_T]]])r   z2Union[IRNode, OutputSpec, torch.device, None, str]r   r  )r   z&Union[IRNode, torch.device, None, str]r   r   )r   zUnion[Buffer, TensorBox]r.  r   r   r   )r?  r  r@  r  rA  r  r   r   )rT  r   rU  z"Sequence[Union[int, torch.SymInt]]r   r   )rd  rd  r   r   )rj  r  r   r  )r   zUnion[Expr, Sequence[Expr]]r  r  r   rm   )r  r   r  r  r  r   r   r  )r  r  r  r
  rQ  r    r   r  r
  )TFNFN)r   r   rf  r   rp  r   rq  r  r@  r   rN  r  r   ztuple[StorageBox, Layout])r   r   rq  r  r   r   r}  )r  r  rA  r  r   r   )r  r  r   r   )r   r	  r   zTypeIs[Sequence[IRNode]])r  r  r   r   )r   re  r   z-tuple[list[ShapeAsConstantBuffer], list[Any]])r   r   r   rD  )r  r   r  ztorch.fx.Noder   r   (O  
__future__r   r  r  r  r  r  loggingrX  textwrapr{  collections.abcr   r   r   r   r   r	   r
   enumr   r   typingr   r   r   r   r   r   r   r   r   r   r   r   typing_extensionsr   r   r   r   r   r   unittest.mockr   r   r    r!   r"   torch._export.serde.schema_exportserder  r  r  r  r  r  torch._loggingr  torch.fxtorch.utils._pytree_pytreerF
  torch._dynamo.utilsr#   torch._export.serde.serializer$   *torch._higher_order_ops.auto_functionalizer%   torch._inductorr&   r	  r(   torch._library.opaque_objectr)   torch._prims_commonr*   r+   r,   r-   r.   %torch.fx.experimental.symbolic_shapesr/   r0   r1   r2   r3   r4   r5   r6   r7   torch.fx.noder8   torch.utils._ordered_setr:   torch.utils._python_dispatchr;   torch.utils._sympy.functionsr<   r=   r>   r?   torch.utils._sympy.symbolr@   r  rB   rC   codegen.commonrD   rE   rF   rG   rH   rI   rJ   rK   rL   rM   	loop_bodyrN   ops_handlerrO   rP   rQ   rR   runtime.benchmarkingrS   runtime.hintsrT   rU   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rg   rh   ri   rj   rk   virtualizedrl   rm   rn   "torch._library.fake_class_registryro   rp   rq   codegen.cutlass.templaterr   codegen.wrapperrs   r  rt   ru   r   r   r#  __version__r0  r/  ImportErrorrv   rw   rx   ry   r   rz   rL  r{   r
  r
  r4
  r|   	getLoggerr   r  r  r)
  r   r   r   r   r  r   r   r   r   r   r   rk  rm  r   r   r   r   r  r  r(  r*  r:  rE  rZ  rf  rm  rt  r   r  r  rM  rO  rn  r  r  r  r+  r  r  r  r  r  r1  r<  r
  rm  rO  rv  rx  ro  r   r  r  r  r|  rS  r  r  r  r  r  r  r  r  r  rP  r  r  r  r  r  r  rr  r*  r  r6  ri  rA  r   r  r  r  r   r   PrimitiveInfoTyper	  r2	  r7	  re	  rs	  r{	  r	  r	  r  r	  r	  r  r
  r
  r	  r  r%  r)  r*  rG  rh  r  r  r  r  r  r  r   r  r  r5  r   r   rg  ri  r  r  rw	  r  r  r   rq  r  r  r  r  r  r  r   r{
  r	  rR
  re  rw  r  r  r
  r
  r  r   r   r   <module>r     s   "          M M :      U T   ' ' 2 2 , ,   $ $ ( ? M # 2 7 
 
 
  / ? Q Q * "     N N - :     0 * ) CB&95$% "(OY'''NJ t_T]T]T]CI&) &C,-) -

 5 5uzz7U7U UVi V!			8??4	8yy~~'T  k	sDk!12K8STU	i 	) d#  $$$D44 , ! $  TX	1>P	 TX
	1
>P

 
 E 
 E 
 O 
 O .2&*:!%
>9
>
>;('7*    
	*$G$G/$G $GN';|, |,~	 UH H HV |
F |
 |
~& %
 %
 %
P 
i 
 
F |$y!y!uu=)< 8  JN<N<N +<NBF<N<N~ b
 b
 b
N '+1:
#  &	& "8D>8D>"BH"LMY M7S9 7St#1 #L[
+ [
| F
5 F
 F
T 	 	 	 V5 V Vr	 !<@=A999 9 :	9
 9 ;9 9x:	$ ^
v ^
 ^
B Y Y Yx -( - -` A9( A9 A9H !( ! !H w; w wt Sh S Sl & & &R_A _AD 6  " K| K K$ S| S S'9	(<7 7  `
Z `
 `
F	C& C[HV [H|!Gf !GHT %{ %D   .V* V*r UEV] E EP U&fi & & & 
K 
[ 
& 6  ( F    Ut4_ t4 t4nH
_ H
VD> DN #udCeCeT<Q6R1SST C CL"| "
\B. \B~5N 50( (6N >Z%N Z%z5455 UR? R Rj E9 EP UR< R Rj U"l " "J
/ 
((" ("V
V 
B=L =@%
 %
P
- 
$1
\ 1
hLl L^))| ))Z/, /d< 8B5 B$8- 826)l 6)r))| ))X9T 9Tx,E ,E^21| 21j-L -8<;< <;~ U  
p& pf U
. 
 
@   2 
  '
, '
X T T Tn+
 +m% m%` U*v * *L U}.\ }. }.@ U ,    D"
"2" Ux  x  x v	)n )X6  "=l "= "=J \  Y Yx/) />/( />S# Sr  T T]r  NJs   j 
j"!j"