
    N j,                       % S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKrS SKJs  Jr  S SKJs  Js  Jr  S SKJr  S SKJr  S SKJrJrJrJr  S S	KJ r   S S
K!J"r"  S SK#J$r$J%r%J&r&J'r'J(r(  S SK)J*r*  \RV                  " \,5      r-Sq.\/S-  \0S'   Sq1\/\0S'   \ Rd                  SAS\/4S jj5       r3\ Rd                  SAS\/4S jj5       r4\Rj                  " SSS9 " S S5      5       r6\Rj                  " SSS9 " S S\65      5       r7S\6S\8\&   S\9\:\8\:   4   SS4S jr;S\"S \<\:S!4   S\"S-  4S" jr=\>" 5       r?\>\<\:\<\:S!4   \@4      \0S#'   S$\"S \<\:S!4   S%\<\&S!4   S&\<\&S!4   S'\:S(\@S)\@SS4S* jrAS+\8\6   S$\"S%\<\&S!4   S&\<\&S!4   S\8\6\7-     4
S, jrB0 rC\9\<\R                  \"   \4   S-4   \0S.'   S$\"S/\SS-4S0 jrESBS1 jrF " S2 S-5      rG SCS3\S4\S5\/S-  S\8\6   4S6 jjrH\ SCS3\S4\S5\/S-  S\8\6   4S7 jj5       rISSSS8.S9\R                  S:\S;\S<\/S5\/S-  S=\/S\R                  4S> jjrK " S? S@\R                  R                  5      rNg)D    N)defaultdict)Sequence)cache)cast)_are_we_tracingone_step_redistribute_cost)DTensorSpec
ShardOrderShardOrderEntry
TensorMeta)assert_no_mixed_partial_types)
DeviceMesh)_StridedShardPartial	Placement	ReplicateShard)get_active_debug_mode#_FORCE_MIN_COST_REDISTRIBUTION_PLANF,_DISABLE_REDISTRIBUTE_TRANSFORM_OPTIMIZATIONTenabledc              #   8   #    [         nU q  Sv   Uq g! Uq f = f7f)u  
Context manager to control the redistribution planning strategy for DTensor operations.

This context manager allows you to choose between two algorithms for computing the
sequence of collective operations needed to redistribute a DTensor from one placement
to another:

- **Graph-based**: Uses Dijkstra's algorithm to find the minimum-cost path
  through all possible placement transformations. This approach considers the global
  cost of all collective operations and finds the optimal sequence. Best for complex
  redistribution patterns where reducing communication cost and memory overhead is critical.

- **Greedy**: Uses a heuristic approach that makes locally optimal choices
  at each step. This is faster to compute but may not produce the globally optimal
  transformation sequence. Best for simple redistribution patterns or when planning
  speed is more important than optimal communication.

**Default Behavior (without this context manager):**

When this context manager is NOT used, the algorithm selection follows this priority:

1. **Non-default shard orders**
   → Always use graph-based algorithm (required for correctness)

2. **Explicit `use_graph_based_transform` parameter** to `_gen_transform_infos_non_cached`
   → Use the specified algorithm (True = graph-based, False = greedy)

3. **No explicit parameter** (default case)
   → Use greedy algorithm for faster planning

**Behavior with this context manager:**

This context manager overrides the default selection by setting the global flag
`_FORCE_MIN_COST_REDISTRIBUTION_PLAN`, which takes precedence over the explicit
`use_graph_based_transform` parameter (but not over non-default shard order requirements).

**Cache Considerations:**

The redistribution planner caches transform info for performance via the `@cache`
decorator on `_gen_transform_infos`. If you need to change the algorithm selection
for the same input specs, clear the cache using `_gen_transform_infos.cache_clear()`
to ensure the new setting takes effect and doesn't reuse cached results from a
previous run.

Args:
    enabled (bool): If True, forces the use of the graph-based algorithm.
                   If False, forces the use of the greedy algorithm.
                   Default: True
N)r   )r   	old_values     w/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/distributed/tensor/_redistribute.py use_min_cost_redistribution_planr   4   s'     j 4I*1'8.7+i+   	 disabledc              #   8   #    [         nU q  Sv   Uq g! Uq f = f7f)a  
Context manager to disable the transform optimization pass that merges
consecutive same-type collectives into single flattened operations.

When the optimization is disabled, ``_optimize_transform_infos`` becomes a
no-op and returns the original list of ``_TransformInfo`` objects unchanged.
This is useful for debugging or isolating issues related to the flattened
collective merging logic.

The flag can also be set directly::

    torch.distributed.tensor._redistribute._DISABLE_REDISTRIBUTE_TRANSFORM_OPTIMIZATION = True

Args:
    disabled (bool): If True (default), disables the optimization.
                     If False, explicitly enables it (the normal default).
N)r   )r   r   s     r   +disable_redistribute_transform_optimizationr    q   s'     * =I3;0A7@4y4r   frozenslotsc                   ^    \ rS rSr% \\S'   \\\4   \S'   \\   \S'   S r	S\
S-  4S jrS	rg)
_TransformInfo   mesh_dimsrc_dst_placementslogical_shapec                 v    U R                   S:  d   eU R                  S   U R                  S   :w  d   S5       eg )Nr      zQTransformInfo should only be created if it is an op with some effect, not a no-op)r'   r(   selfs    r   __post_init___TransformInfo.__post_init__   sB    }}!!!&&q)T-D-DQ-GG 	
_	
G    returnNc                 x   U R                   u  pUR                  5       (       a  UR                  5       (       a  gUR                  5       (       a  UR                  5       (       a  gUR                  5       (       a  UR                  5       (       a  gUR                  5       (       a  UR                  5       (       a  gg)z
Return a key for grouping transforms by communication type.

Returns None for local ops (no communication needed), or a string
that identifies the collective type for potential grouping/merging.

all_reducereduce_scatter
all_gather
all_to_allN)r(   
is_partialis_replicateis_shard)r-   srcdsts      r   _comm_type_key_TransformInfo._comm_type_key   s     **>> 0 0 2 2^^#,,..#\\^^ 0 0 2 2\\^^ r0    )__name__
__module____qualname____firstlineno__int__annotations__tupler   listr.   strr<   __static_attributes__r>   r0   r   r%   r%      s6    Mi2339
d
 r0   r%   c                   V    \ rS rSr% Sr\\S'   \\S4   \S'   Sr	\S-  \S'   S
S jr
S	rg)_FlattenedTransformInfo   a   
Represents a flattened transform that combines multiple mesh dimensions
into a single collective operation using a flattened DeviceMesh.

Note: inherits the fields from _TransformInfo. Gets an __init__ with parent fields, followed by child fields,
and runs parent validation (post_init)
mesh.original_mesh_dimsN	avg_scalec                     [         R                  U 5        U R                  b%  U R                  S:  d   SU R                   35       eg g )Nr+   z"avg_scale must be > 1 if set, got )r%   r.   rN   r,   s    r   r.   %_FlattenedTransformInfo.__post_init__   sG    $$T*>>%>>A% 4T^^4DE% &r0   r>   r1   N)r?   r@   rA   rB   __doc__r   rD   rE   rC   rN   r.   rH   r>   r0   r   rJ   rJ      s2     c3h' !IsTz r0   rJ   transform_infocurrent_placementsshard_order_dictr1   c                    U R                   u  p4[        U [        5      (       a  U R                  nOU R                  4n[        U[
        [        -  5      (       a  UR                  n[        5       nU HL  n[        X&   5      S:X  a  [        SU SU  SU SU 35      eUR                  X&   R                  5       5        MN     [        U5      U:X  d#  [        S[        U5       SU SU  SU SU 3
5      e[        U[
        [        -  5      (       a1  UR                  n	X;  a  / X)'   U H  n
X)   R                  U
5        M     U H  n
XAU
'   M	     g)	zk
Update current_placements and shard_order_dict in-place to reflect the
effect of a single transform step.
r   z?Invalid shard_order update. No entries left to pop for src_dim z. transform_info=z, current_placements=z, shard_order=z^Mismatch between expected and removed mesh dims during shard_order update. Expected to remove z, but removed N)r(   
isinstancerJ   rM   r'   r   r   dimsetlen
ValueErroraddpopappend)rS   rT   rU   src_placementdst_placement	mesh_dimssrc_dimremoved_dim_dst_dimr'   s              r   "_update_shard_order_and_placementsrf      s    $2#D#D M."9::"55	#,,.	-!677##eA#,-2 Ui00@ A**<)= >##3"46  OO,599;<  9~,.y>". >""0!1 2&&8%9 :/02  -!677##*(*%!H%,,X6 " '48$ r0   rL   ra   .c                 Z  ^ U R                  5       nU R                  mTc  g[        U4S jU 5       5      nU R                  U5      nUR	                  5       n[        U5      S:  a  UR                  5       nUR                  R                  5        H  nUR                  U:X  d  M  Us  $    g)a  
Query for an explicitly created flattened mesh using layout comparison.

Args:
    mesh: The DeviceMesh to query
    mesh_dims: Tuple of mesh dimension indices to look for

Returns:
    The flattened DeviceMesh if it was explicitly created, None otherwise.
Nc              3   .   >#    U  H
  nTU   v   M     g 7fNr>   ).0imesh_dim_namess     r   	<genexpr>0_get_flattened_mesh_by_layout.<locals>.<genexpr>  s     ;AnQ's   r+   )
_get_root_meshrl   rE   _get_slice_mesh_layoutcoalescerZ   nest_flatten_mappingvalues_layout)rL   ra   	root_mesh	dim_namessliced_layoutexpected_layoutflattened_meshrl   s          @r   _get_flattened_mesh_by_layoutr{      s     ##%I((N ;;;I //	:M#,,.O
?a)..0 $44;;=!!_4!! > r0   _warned_flatten_issuesdevice_meshsrc_placementsdst_placementsnum_ops	comm_typereasonc                    [        U 5      X4nU[        ;   a  g[        R                  U5        U R                  nUb-  U V	s/ s H  oU	   PM	     n
n	SR	                  S U
 5       5      nOSSR	                  S U 5       5       SU  3nSnUS:X  a  S	U S
3nO$US:X  a  SnOUS:X  a  SU S3nO[        SU 35      e[        R                  XX4X]5        gs  sn	f )a  
Warn once per (mesh, dims, reason) about inability to flatten operations.

Args:
    device_mesh: The device mesh being used
    mesh_dims: Tuple of mesh dimensions that could not be flattened
    src_placements: Source placements for the redistribution
    dst_placements: Target placements for the redistribution
    num_ops: Number of sequential operations that will be performed
    comm_type: Type of collective operation (e.g., "reduce_scatter")
    reason: Either "no_flattened_mesh" or "uneven_tensor_shape"
Nz, c              3   .   #    U  H  nS U S 3v   M     g7f)"Nr>   )rj   names     r   rm   :_warn_flatten_optimization_not_possible.<locals>.<genexpr>A  s     ?YTqa[Ys   zdims c              3   8   #    U  H  n[        U5      v   M     g 7fri   )rG   )rj   ds     r   rm   r   C  s     $?YSVVY   z of a%  While redistributing from %s to %s, %d sequential %s operations will be performed. This is suboptimal: multiple collective operations have higher latency (separate kernel launches and synchronization points) and may give inconsistent results between ranks due to different reduction orders. %sno_flattened_meshz&To optimize, flatten mesh dimensions [z0] so DTensor can use a single operation instead.uneven_tensor_shape)z Unfortunately, because the tensor dimension is not evenly divisible by the product of the mesh dim sizes that would need to be flattened for the optimization to work, it can not be optimized.non_ascending_mesh_dimsz0it is not possible to merge non-ascending order z operations.zUnexpected reason: )hashr|   r\   rl   joinAssertionErrorloggerwarning)r}   ra   r~   r   r   r   r   	cache_keyrl   r   rw   dims_strcommon_warning
reason_msgs                 r   '_warn_flatten_optimization_not_possibler   $  s	   * k"I6I**y) //N!09:	1A&		:99?Y??499$?Y$??@[MR	`  $$=hZGwx
	(	(

 
,	,>ykV 	 26(;<<
NN7 ;s   Ctransform_infosc           	        ^^^ [        U 5      S:  a  U $ [        (       a  U $ [        1 Sk5      mS[        S-  S[        4U4S jjnS[
        [        [        4   S[
        [        [        4   S[        4S	 jmS
[        [           S[
        [        S-  [        S-  4   4UU4S jjn/ nSnU[        U 5      :  Gar  X   nUR                  5       n	U" U	5      (       d  UR                  U5        US-  nMI  UR                  n
U/nUS-   nU[        U 5      :  a  U" X   R                  5       5      (       az  T" X   R                  U
5      (       a`  UR                  X   5        US-  nU[        U 5      :  a9  U" X   R                  5       5      (       a  T" X   R                  U
5      (       a  M`  U" U5      u  pUb  UR                  U5        OLUR                  U5        US;   a5  [        [        S U 5       5      5      n[        TUUU[        U5      U	U5        UnU[        U 5      :  a  GMr  [         R#                  SX5        U$ )at  
Optimize transform infos by merging consecutive same-type collectives into
a single flattened operation when a matching flattened DeviceMesh exists.

Merging requirements:
- Operations must be consecutive in the transform list (no reordering).
  Notably, redistributing from P, P, P -> R, S, R is not optimized here and cannot be optimized due to
  optimization needing to fuse non-contiguous reductions, leaving this pattern vulnerable to numerics issues and
  suboptimal perf
- Operations must have the same comm type (e.g., all allgather or all reduce_scatter)
- Operations must have identical src_dst_placements (e.g., can't merge
  Partial->Shard(0) with Partial->Shard(1))
- A flattened mesh covering the relevant dimensions must exist
- For reduce_scatter, tensor dim must be evenly divisible by flattened mesh size

For nested sharding, the merged operation uses the logical_shape from the
outermost mesh dimension (smallest mesh_dim index) which represents the
global tensor shape needed for correct padding/unpadding.

TODO:
- all_to_all operations are excluded from merging, but it may be possible to merge them in some cases.

   >   r5   r3   r4   keyNr1   c                    > U T;   $ )z:Check if a comm type key represents a mergeable operation.r>   )r   MERGEABLE_COMM_TYPESs    r   is_mergeable/_optimize_transform_infos.<locals>.is_mergeable  s    ***r0   p1p2c                    X:X  a  gU u  p#Uu  pEX5:w  a  gUR                  5       (       a  UR                  5       (       d  g[        [        U5      n[        [        U5      nSS1nUR                  U;   =(       a    UR                  U;   $ )z
Check if two src_dst_placements can be merged.

Allows merging of Partial("sum") and Partial("avg") since they can be
combined: perform sum reduction, then scale by avg mesh dims afterward.
TFsumavg)r7   r   r   	reduce_op)	r   r   src1dst1src2dst2partial1partial2mergeable_reduce_opss	            r   are_placements_mergeable;_optimize_transform_infos.<locals>.are_placements_mergeable  s     8

 < !!doo&7&7 && %u~"66 ;""&::	
r0   infosc           	        >^^ [        U 5      S:  a  gU S   R                  mU S   R                  5       n[        UU4S jU  5       5      (       d   e[	        S U  5       5      n[	        [        U5      5      nUS:X  a  X#:w  a  gOUS:X  a  X#S	S	S
2   :w  a  g[        TU5      nUc  gTu  pVUS:X  a)  [        [        U5      R                  m[        U U4S jS9nOUS:X  aa  [        [        U5      R                  m[        U U4S jS9nUR                  T   n[        R                  " U4S jU  5       5      n	X-  S:w  a  gOUS:X  a  U S   nO[        SU 35      eS	n
UnUR                  5       (       a3  [        R                  " U4S jU  5       5      nUS:  a  Un
[!        S5      nX4n[#        SUUR                  UUU
S9S	4$ )a  
Try to create a flattened transform from 2+ same-type transforms.

Returns (result, failure_reason) where:
- result is the FlattenedTransformInfo if successful, None otherwise
- failure_reason is None if successful, or one of:
  - "too_few_transforms": Less than 2 transforms provided
  - "no_flattened_mesh": No flattened mesh exists for the required dimensions
  - "uneven_tensor_shape": For reduce_scatter, tensor dim not evenly divisible
r   )Ntoo_few_transformsr   c              3   J   >#    U  H  nT" UR                   T5      v   M     g 7fri   )r(   )rj   infor   first_placementss     r   rm   J_optimize_transform_infos.<locals>.try_create_flattened.<locals>.<genexpr>  s)      
 %T%<%<>NOOs    #c              3   8   #    U  H  oR                   v   M     g 7fri   r'   )rj   r   s     r   rm   r     s     :ED--Er   r4   )Nr   r5   N)Nr   c                 "   > U R                   T   $ ri   r)   xaffected_dims    r   <lambda>I_optimize_transform_infos.<locals>.try_create_flattened.<locals>.<lambda>      aool6Sr0   )r   c                 "   > U R                   T   $ ri   r   r   s    r   r   r     r   r0   c              3   Z   >#    U  H   nTR                  UR                  5      v   M"     g 7fri   )sizer'   rj   r   r}   s     r   rm   r     s&      2<AD  //Es   (+)Nr   r3   z0Unsupported comm type for try_create_flattened: c              3      >#    U  HM  n[        [        UR                  S    5      R                  S:X  d  M0  TR	                  UR
                  5      v   MO     g7f)r   r   N)r   r   r(   r   r   r'   r   s     r   rm   r     sL      !D!8!8!;<FF%O 0  //!s
   /A"Ar+   r   )r'   r(   r)   rL   rM   rN   )rZ   r(   r<   allrE   sortedr{   r   r   rX   maxr)   mathprodNotImplementedErrorr7   r   rJ   )r   r   ra   sorted_mesh_dimsrz   r:   r;   outermost_infotensor_dim_sizeeffective_shard_mesh_sizerN   
merged_srcscalemerged_placementsr   r   r   r}   s                 @@r   try_create_flattened7_optimize_transform_infos.<locals>.try_create_flattened  s    u:>- !866!H++-	 

 
 
 	
 
 :E::	 	!23 (( ,6 -,& TrT2266{DTU!, $$s+//L ,STN**s+//L ,STN,::<HO(,		 2<A2 )% :a?2 @,&"1XN%B9+N 
 	
>>II ! E
 qy!	$U^
'- $#4,::##3# 

 
	
r0   r   r+   )r   r   r   c              3   8   #    U  H  oR                   v   M     g 7fri   r   )rj   gs     r   rm   ,_optimize_transform_infos.<locals>.<genexpr>=  s     (CUUr   z5_optimize_transform_infos original: %s, optimized: %s)rZ   r   	frozensetrG   boolrE   r   rF   r%   rJ   r<   r^   r(   extendr   r   r   debug)r   r}   r~   r   r   r   resultrk   r   current_keyrT   groupj	flattenedfailure_reasonra   r   r   s    `              @@r   _optimize_transform_infosr   `  sh   : ?a33 %%ST+#* + +
)Y&'
-29i3G-H
	
@f
N#f
	&-sTz9	:f
 f
R >@F	A
c/"
"!))+ K((MM$FA
 "44'+fEO$$_/>>@AA("557I  LL+,FA O$$_/>>@AA("557I  %9$?!	 MM)$ MM%  " 
 "&(CU(C"CD	7""J" e c/"
"f LL? Mr0   DTensorRedistributePlanner_planner_cachedtensor_metac                     [        5       (       a  [        X5      $ [        R                  " U 5      U4nU[        ;  a  [        X5      nU[        U'   [        U   $ )a  
Factory function to get or create a DTensorRedistributePlanner instance.
This function provides transparent caching of planner instances based on
device mesh and dtensor meta. Multiple calls with the same parameters
will return the same cached instance for better performance.
Args:
    device_mesh: The device mesh for the planner
    dtensor_meta: TensorMeta of the DTensor to redistribute
Returns:
    A DTensorRedistributePlanner instance (potentially cached)
)r   r   weakrefrefr   )r}   r   r   planners       r   get_redistribute_plannerr   W  sR     )+DD[)<8I&,[G$+y!)$$r0   c                  ,    [         R                  5         g)z8Clear the cache of DTensorRedistributePlanner instances.N)r   clearr>   r0   r    clear_redistribute_planner_cacher   q  s    r0   c                      \ rS rSrSr\R                  " SSS9 " S S5      5       rS r\	S\
\\\   4   S	\4S
 j5       r\	S\S	\
\\\   4   4S j5       r\	  S'S\S\\   S\\S4   S\S-  S\S	\4S jj5       rS\S\S	S4S jr  S(S jrS\\S4   S\S	\
S\4   4S jrS\S\S	\S   4S jrSSS\S \\S4   S	\\   4S! jrS"\S#\S \\S4   S	\\   4S$ jr S"\S#\S	\\   4S% jr!S&r"g))r   iv  ac  
This class is used to plan the collective calls to transform the local shard
of the DTensor from its current spec to the target spec.
Suppose there are N tensor dimensions and M mesh dimensions, the total
possible state size will be (N+2)*M*M!.
Note: Use get_redistribute_planner() factory function instead of direct
instantiation for automatic caching.
Tr!   c                       \ rS rSr% \\S4   \S'   \\S'   \R                  " SSSSS9r
\S-  \S'   S	 rS
 rS rS\4S jrS\4S jrS\S\4S jrSrg)$DTensorRedistributePlanner.DistStatei  .
placementstensor_dim_to_mesh_dimNF)defaultinitreprcompare_hashc                 X    [         R                  " U R                  U R                  5      $ ri   )r
   format_shard_order_strr   r   r,   s    r   __str__,DTensorRedistributePlanner.DistState.__str__  s%    55++ r0   c                 "    U R                  5       $ ri   )r   r,   s    r   __repr__-DTensorRedistributePlanner.DistState.__repr__  s    <<>!r0   c                 N    [         R                  U SU R                  5       5        g )Nr   )object__setattr___compute_hashr,   s    r   r.   2DTensorRedistributePlanner.DistState.__post_init__  s"    ""$r0   r1   c                 T    U R                   b  U R                   $ U R                  5       $ ri   )r   r  r,   s    r   __hash__-DTensorRedistributePlanner.DistState.__hash__  s#    !%!74::QT=O=O=QQr0   c                 D    [        U R                  U R                  45      $ ri   )r   r   r   r,   s    r   r  2DTensorRedistributePlanner.DistState._compute_hash  s$    OO// r0   otherc                     [        U[        R                  5      (       d  gU R                  UR                  :w  a  gU R                  U R
                  4UR                  UR
                  4:H  $ NF)rW   r   	DistStater   r   r   )r-   r	  s     r   __eq__+DTensorRedistributePlanner.DistState.__eq__  sd    e%?%I%IJJzzU[[(++   ,, r0   r>   )r?   r@   rA   rB   rE   r   rD   r   dataclassesfieldr   rC   r   r   r.   r  r  r   r   r  rH   r>   r0   r   r  r     sw    )S.)) **'--u5%
sTz 	
		"		Rc 	R	3 		 	4 	r0   r  c                 j   ^  [        U[        [        -  5      (       a  [        U 4S jU 5       5      $ U$ )z<Convert a nested list structure to a nested tuple structure.c              3   F   >#    U  H  nTR                  U5      v   M     g 7fri   )	_to_tuple)rj   itemr-   s     r   rm   7DTensorRedistributePlanner._to_tuple.<locals>.<genexpr>  s     <!$--!s   !)rW   rF   rE   )r-   r   s   ` r   r  $DTensorRedistributePlanner._to_tuple  s*    a&&<!<<<r0   r   r1   c                 T    [        S [        U R                  5       5       5       5      $ )zConvert dict to ShardOrderc              3   ^   #    U  H#  u  pU(       d  M  [        U[        U5      S 9v   M%     g7f))
tensor_dimra   N)r   rE   )rj   r   values      r   rm   ADTensorRedistributePlanner._dict_to_ShardOrder.<locals>.<genexpr>  s*      
/
 DOseElC/s   --)rE   r   items)r   s    r   _dict_to_ShardOrder.DTensorRedistributePlanner._dict_to_ShardOrder  s)      
$QWWY/
 
 	
r0   c                 x    [        [        5      nU  H$  n[        UR                  5      XR                  '   M&     U$ )z1Convert ShardOrder to dict with tensor dim as key)r   rF   ra   r  )r   tensor_mesh_dim_dictentrys      r   _ShardOrder_to_dict.DTensorRedistributePlanner._ShardOrder_to_dict  s7      +40E59%//5J !1!12 ##r0   NrL   r   r_   .src_shard_order use_strided_shard_as_shard_orderc                    [        U5      U R                  :X  d   eU(       a  [        R                  " X SS9u  p#Uc  [        R                  " U5      n[        U5      n[        R                  U5      n[        R                  [        U5      U5      nU/n/ n	U Hs  n
[        U
[        5      n[        XU5        [        R                  [        U5      [        R                  U5      5      nUR                  U5        U	R                  U5        Mu     [        US   5      /n[!        U	5       H@  u  pU(       a  SOSnUR                  U5        UR                  [        XS-      5      5        MB     SR#                  U5      $ )aL  
Generate a string representation of the sequence of state transitions
(placements and shard orders) as described by the given transform_info.

Args:
    mesh: The DeviceMesh used for the redistribution.
    transform_infos: A sequence of _TransformInfo objects describing each
        transformation step.
    src_placement: The initial tuple of Placement objects.
    src_shard_order: (Optional) The initial ShardOrder representing
        the mapping of tensor dimensions to mesh dimensions. If None,
        the default shard order is computed from src_placement and mesh.
    use_strided_shard_as_shard_order: If True, normalize _StridedShard
        placements into regular Shard placements with an explicit
        shard_order before stringifying.

Returns:
    A string showing the sequence of DistState transitions, separated by '->'.
Tr%  r   z-->z->r+    )rZ   ndimr
   &_normalize_placements_into_shard_ordercompute_default_shard_orderrF   r   r"  r  rE   rW   rJ   rf   r  r^   rG   	enumerater   )rL   r   r_   r$  r%  cur_placementrU   	cur_state
state_listis_flattened_listrS   is_flattened	new_statetrace_partsrk   	separators                   r   stringify_transform_infos4DTensorRedistributePlanner.stringify_transform_infos  sq   6 =!TYY...+BB!$ +M
 ")EEmTO]+5II
 /88- /
	 

 )+-N%n6MNL./? 3<<m$*>>?OPI i($$\2 . :a=)*():;OA!-4Iy)s:!e#456  < ww{##r0   r}   r   c                     Xl         UR                  5       (       d   eUc   eX l        [        UR                  5      U l        [        5       U l        [        5       U l        U R                  5         g)z
Initialize DTensorRedistributePlanner.

Args:
    device_mesh: The device mesh for this planner
    dtensor_meta: TensorMeta of the DTensor to redistribute
N)
r}   _is_current_rank_part_of_meshr   rZ   shapetensor_dimensionrY   "strided_shard_placements_in_targetpartial_reduce_ops_in_targetsetup_cost_callbacks)r-   r}   r   s      r   __init__#DTensorRedistributePlanner.__init__  sd     '88::::'''( #L$6$6 7FIe/69e)!!#r0   c                 ^   ^ ^ S[         R                  S[        4U 4S jjmU4S jnUT l        g)z
Set up the cost function for different collective operations.
Uses communication time estimation based on actual tensor sizes and
mesh topology for accurate cost modeling.
stater1   c                 n   > [        TR                  U R                  TR                  U R                  SS9$ )NF)rL   r   tensor_metashard_orderr%  )r
   r}   r   r   r   )rA  r-   s    r   state_to_specFDTensorRedistributePlanner.setup_cost_callbacks.<locals>.state_to_spec-  s9     %% ++ --!8816 r0   c                 4   > [        T" U 5      T" U5      5      $ ri   r   )	src_state	dst_staterE  s     r   cost_functionFDTensorRedistributePlanner.setup_cost_callbacks.<locals>.cost_function8  s    -i(-	*B r0   N)r   r  r
   rJ  )r-   rJ  rE  s   ` @r   r=  /DTensorRedistributePlanner.setup_cost_callbacks$  s-    		-77					
 +r0   r   tensor_mesh_dim_tupler   c           	         0 n[         R                  U5      nU R                  U R                  U5      U5      nU H  nUR                  nXG   S   n[        X   [        5      (       d  M/  [        U R                  5       H  n	Xy:X  a  M
  XG   R                  5       n
XI   R                  U
5        [        U5      n[        U	5      X'   U R                  U R                  U5      [         R                  U5      5      nU R                  UU5      X<'   XG   R                  U
5        XI   R                  5         M     M     U H  nUR                  nXG   S   n[        X   [        5      (       d  M/  XG   R                  5       n
[        U5      n[        5       X'   U R                  U R                  U5      [         R                  U5      5      nXG   R                  U
5        U R                  UU5      X<'   M     [        U5       Hh  u  p[        U[         5      (       d  M  [        U5      n[        5       X'   U R                  U R                  U5      U5      nU R                  UU5      X<'   Mj     [        U5       H  u  p[        U[        5      (       d  M  [        U R                  5       H  n	[        U5      n[        U	5      X'   XI   R                  U5        U R                  U R                  U5      [         R                  U5      5      nU R                  UU5      X<'   XI   R                  5         M     M     [        U5       H  u  p[        U[         5      (       d  M  [        U R                  5       H  n	[        U5      n[        U	5      X'   XI   R                  U5        U R                  U R                  U5      [         R                  U5      5      nU R                  UU5      X<'   XI   R                  5         M     M     [        U5       H  u  p[        U[        5      (       d  M  U R"                   H  n[        U5      n[!        U5      X'   U Vs1 s H&  n[        U[         5      (       d  M  UR$                  iM(     nn['        U5      S:  a
  USS1:w  a  Mg  U R                  U R                  U5      U5      nU R                  UU5      X<'   M     M     U H  nUR                  nXG   S   n[        X   [(        5      (       d  M/  XG   R                  5       n
[        U5      n[        5       X'   U R                  U R                  U5      [         R                  U5      5      nXG   R                  U
5        U R                  UU5      X<'   M     U R*                  (       d  U$ [        U5       H  u  p[        U[        5      (       d  M  U R*                   H  nUR,                  n	[        U5      nUX'   XI   R                  U5        U R                  U R                  U5      [         R                  U5      5      nU R                  UU5      X<'   XI   R                  5         M     M     U$ s  snf )Nr   r+   r   r   )r   r"  r  r  r  rW   r   ranger:  r]   r^   rF   r  rJ  r   r,  r   r<  r   rZ   r   r;  rX   )r-   r   rM  all_next_stater   cur_dist_stater!  src_tensor_dimsrc_mesh_dimdst_tensor_dimmove_mesh_dimnew_placements
dist_state	placementr'   r   ppartial_reduce_opsstrided_shard_objs                      r   get_next_state)DTensorRedistributePlanner.get_next_state?  s$   H MO9MM! 
 NN:&!
 +E"--N/?CLj6>>"'(=(=">!3 !5 D H H J$4;;MJ!%j!105n0E-!^^NN>2.BB,
 .2-?-?".*
 %4;;MJ$488:+ #? +B +E"--N/?CLj6>>0@DDFM!*-N,5KN)~.*>>?STJ !077F)-););*N& +* (1'<#Li11!*-N+4;N(~.0EJ *.););*N& (= $-Z#8Hi33"'(=(=">!%j!1+0+@($4;;HE!^^NN>2.BB,
 .2-?-?".* %488: #? $9, $-Z#8Hi11"'(=(=">!%j!1+0+@($4;;HE!^^NN>2.BB,
 .2-?-?".* %488: #? $90 $-Z#8Hi33!>>	!%j!1+29+=( *8&)7A:a;QKAKK # & )*Q.3E%QV3W!^^NN>24I
 .2-?-?".* ? $9: +E"--N/?CLj6FF0@DDFM!*-N,5KN)~.*>>?STJ !077F)-););*N& +& 66!! $-Z#8Hi33%)%L%L!!2!6!6!%j!1+<($4;;HE!^^NN>2.BB,
 .2-?-?".* %488:! &M $9* Q&s   Y.2Y.rH  rI  c                    SSK nSnSXAU/4/n[        5       nU(       a  UR                  U5      u  pxpX:X  a  U
$ X;   a  M)  UR                  U	5        U R	                  U	R
                  U	R                  5      nUR                  5        H.  u  pX;  d  M  X}-   nX/-   nUS-  nUR                  X^XLU45        M0     U(       a  M  [        SU SU 35      e)z
Find the min cost path from src_state to dst_state using Dijkstra's
algorithm.

Args:
    src_state: The source state
    dst_state: The destination state

Returns:
    A list of states representing the min cost path from src_state to
    dst_state
r   Nr+   zNo path found from src_state z to dst_state )
heapqrY   heappopr\   r\  r   r   r  heappushr   )r-   rH  rI  r_  counterpqvisitedcostrd   current_statepathnext_states
next_statetransition_costnew_costnew_paths                   r   find_min_cost_path-DTensorRedistributePlanner.find_min_cost_pathY  s     	  i[12 	 %+0==+<(D])'KK&--((-*N*NK 0;/@/@/B+
,#5H#l2HqLGNN2'x'PQ 0C b" +I;nYKP
 	
r0   r'   full_tensor_shapec           	      n   [        U5      nUR                   GH  nUR                  nUR                  n[	        U5      S:  d   eU H  nX:X  a  M
  UR
                  U   n	[        U	[        5      (       aH  U	R                  XF   U R                  R                  US9U R                  R                  U5      5      u  pOk[        U	[        5      (       aH  U	R                  XF   U R                  R                  US9U R                  R                  U5      5      u  pO[        SU	 35      eXU'   M     GM     U$ )Nr   r   zUnsupported placement type: )rF   r   r  ra   rZ   r   rW   r   local_shard_size_and_offsetr}   r   _sym_get_coordinater   r[   )r-   rH  r'   ro  new_logical_shaper!  r  ra   mdimrX  new_sizerd   s               r   get_logical_shape,DTensorRedistributePlanner.get_logical_shape  s4    !!2355E))JIy>A%%%!#%006	i//"+"G"G)5((--t-<((<<TB#KHa
  	=99"+"G"G)5((--t-<((<<TB#KHa %'CI;%OPP08*-% "	 6. ! r0   src_specdst_specc           
         S[         S[        [        [        S4   [        4   4S jnU" U5      u  pVU" U5      u  pxU H5  n	[	        U	[
        5      (       d  M  U R                  R                  U	5        M7     [        R                  " XW5       H?  n	[	        U	[        5      (       d  M  U R                  R                  U	R                  5        MA     U R                  XV5      n
U R                  Xx5      n/ nU R                  X5      n[        R                  " U5       H  u  pUR                   UR                   :w  d  M!  Sn[#        [%        UR                   UR                   5      5       HS  u  nu  nnUU:w  d  M  US:w  a  ['        S5      eUnU R)                  UUU5      nUR+                  [-        UUU4US95        MU     M     U$ )Nspecr1   .c                     U R                   (       a/  [        R                  " U R                  U R                  SS9u  pX4$ U R
                  c  [        SU  35      eU R                  U R
                  4$ )NTr'  zMissing shard_order field in )r%  r
   r*  r   rL   rD  r[   )r{  rV  rD  s      r   _try_normalize_spec\DTensorRedistributePlanner.generate_graph_based_transform_infos.<locals>._try_normalize_spec  sr     44FF		9= , &22##+$'DTF%KLL(8(888r0   r   z@Multiple mesh_dims are different between cur_state and nxt_stater'   r(   r)   )r
   rE   r   r   rW   r   r;  r\   	itertoolschainr   r<  r   r  rm  pairwiser   r,  zipr   rv  r^   r%   )r-   rx  ry  ro  r}  r~   r$  r   dst_shard_orderrX  rH  rI  r   
state_pathr.  	nxt_stateupdate_mesh_dimr'   r-  nxt_placementr)   s                        r   $generate_graph_based_transform_infos?DTensorRedistributePlanner.generate_graph_based_transform_infos  s   	9	95C(*45	9" +>h*G'*=h*G'
 (I)]3377;;IF ( #HI)W--1155i6I6IJ I NN>C	NN>C	02,,YB
$-$6$6z$B I##y';';;"$@I	,,i.B.BCA<H<}m %5*b0"0 b#  +3(,(>(>%x1B) (..*)84A=3Q.;A	 %C0 r0   c           	         [        UR                  5      nU/n/ nU R                  R                  S:X  aX  UR                  S   UR                  S   :w  a6  UR                  [        SUR                  S   UR                  S   4US95        U$ [        UR                  5       H  u  pgXF   n[        U[        5      (       a  X`R                  R                  S-
  :  a  U R                  R                  US9n	UR                  XR                     U	U R                  R                  U5      5      u  p[        U5      nXUR                  '   UR                  U5        M  M  UR                  U5        M     [        UR                  5      n[        UR                  5      nUR                  S:  Ga  [        [!        [#        U5      5      5       H  nX   nX   n[        U[        5      (       a  UR                  n/ / nn[        [%        X5      5       H`  u  nu  nnXo:  a    OTUR'                  U5      (       a  UR                  U5        UR'                  U5      (       d  MO  UR                  U5        Mb     UU:w  a
  [)        5       nUU:w  d  M  UR                  [        UUU4XO   S95        UX'   M     [        [%        X5      5       H3  u  nu  nnUU:w  d  M  UR                  [        UUU4XO   S95        UX'   M5     U$ )ax  
Generate the transform infos from the source placements to the target placements.

To transform from source to target placement it might have multiple steps, i.e. it
might decompose Si -> Sj into Si -> R -> Sj.
This would detect if there're mis-aligned/nested shardings between src/dst placements.
E.g. Suppose the redistribution to perform is (Shard(0), Shard(0)) -> (Replicate(), Shard(0)),
in this case Shard(0) -> Shard(0) for mesh dimension 1 actually needs resharding, because in
the former is a nested-sharding of a tensor already already sharded dimension 0, whereas
the latter is the first sharding on tensor dimension 0.
r+   r   r  r   )rF   r9  r}   r)  r   r^   r%   r,  rW   r   r   _local_shard_size_and_offsetrX   rr  
num_shardsreversedrO  rZ   r  r9   r   )r-   rx  ry  initial_logical_shapemesh_dims_to_logical_shaper   rk   r:   current_logical_shapemesh_dim_sizelocal_shard_sizerd   rs  rT   target_placementsr'   currenttarget	shard_dimcurrent_mesh_shardingtarget_mesh_shardingsrY  s                          r   generate_greedy_transform_infos:DTensorRedistributePlanner.generate_greedy_transform_infos  s   $ !%X^^ 4&;%<"02  A% ""1%)<)<Q)??&&"!"$//2$//2, '<	 #"
   3 34FA$>$A!#u%%'',,q00$($4$4$9$91$9$EM*-*J*J-gg6%((<<Q?+'$
 )--B(C%1Acgg..556GH 1 +112GH 5( "("5"56 !4!45" %U3/A+B%CD,6*4 fe,, !'

IBDb+?)%..B&	6Aq =!::i00188;::i00077:& -0DD
 "+f$#**&%-07/@*D*N 4:&0G EP ,5"6,
'H'w & &&"!),3V+<&@&J 06",,
 r0   )rJ  r}   r   r<  r;  r:  r  rQ   )#r?   r@   rA   rB   rR   r  	dataclassr  r  staticmethoddictrC   rF   r   r  r"  r   r   r%   rE   r   r   rG   r5  r   r>  r=  floatr\  rm  rv  r
   r  r  rH   r>   r0   r   r   r   v  s-    $d3. . 4.` 
tCcN3 

 
 
 $z $d3S	>.B $ $ 
 .216D$D$!.1D$ Y^,D$ $d*	D$
 +/D$ 
D$ D$L$$ !$ 
	$*+	+6V)S.)V  *V 
4e;	<	Vt0
"0
/80
	4	50
d!9! ! !c?	!
 
c!@GG G !c?	G
 
n	GRyy y 
n		yr0   rx  ry  use_graph_based_transformc                    U R                   nU R                  nUR                  n[        S XE4 5       5      (       + n[        S / U R                  QUR                  Q7 5       5      nU(       d  U(       a  SnO[
        b  [
        nOUc  SnU R                  c   e[        UU R                  5      nU(       a  UR                  XU R                  5      n	U	$ UR                  X5      n	U	$ )Nc              3   N   #    U  H  n[         R                  " U5      v   M     g 7fri   )r
   is_default_device_order)rj   orders     r   rm   2_gen_transform_infos_non_cached.<locals>.<genexpr>z  s%      $7E 	++E227s   #%c              3   B   #    U  H  n[        U[        5      v   M     g 7fri   )rW   r   )rj   rY  s     r   rm   r  ~  s!      =A 	1m$$=s   TF)r}   rD  r   anyr   r   rC  r   r  r9  r  )
rx  ry  r  r}   r$  r  has_non_default_orderhas_strided_sharddrpr   s
             r   _gen_transform_infos_non_cachedr  p  s   
 &&K**O**O !$ $%7$ !   =8&&=)<)<=   1$(!	,	8$G!	"	*$)!+++
"C !BB

  ==hQr0   c                     [        XU5      $ ri   )r  )rx  ry  r  s      r   _gen_transform_infosr    s     +5 r0   )async_opr  is_explicitlocal_tensorcurrent_spectarget_specr  r  c                0   UR                   UR                   :w  a  [        S5      eUR                  c  [        S5      e[	        UR
                  5        [	        UR
                  5        U nUR                   nUR                  5       (       d  U $ [        5       (       a  [        XU5      nO[        XU5      n[        UUUR
                  UR
                  5      n	[        5       n
U
b\  U
R                  U UR
                  UR
                  [        R                  UU	UR
                  UR                  UR                  5      US9O[         R"                  " 5       nU   U	 GH  n[%        U[&        5      (       a  UR                   nOUnUR(                  nUR*                  u  nnUR-                  US9nUU:X  a  U nM\  US:X  a  U nMf  UR/                  5       (       a  UR1                  5       (       aT  [3        [4        U5      nUR7                  XU5      n[%        U[&        5      (       a  UR8                  b  XlR8                  -  nGOUR;                  5       (       a.  [3        [<        U5      nUR?                  XXR@                  5      nGO>[%        U[B        5      (       a  UR?                  XXR@                  5      nGO[E        SU SU S	35      eUR;                  5       (       GaM  [3        [<        U5      nUR1                  5       (       aU  [3        [4        U5      nURG                  XUU5      n[%        U[&        5      (       a  UR8                  b  XlR8                  -  nGOiUR/                  5       (       a#  URI                  XXRK                  U5      5      nGO1UR;                  5       (       aU  [3        [<        U5      nURL                  URL                  :w  a)  URO                  U UUUR@                  URL                  5      nGO[%        U[B        5      (       a  [        S
5      e[        SU SU 35      eUR1                  5       (       a  UR/                  5       (       a$  [3        [4        U5      nURQ                  XU5      nGOHUR;                  5       (       d  [%        U[B        5      (       a  [E        SU SU S	35      eUU:w  a  [S        SU SU S35      eU nO[%        U[B        5      (       a  UR1                  5       (       a  [        S5      eUR/                  5       (       a"  URU                  XXRK                  U5      5      nOUR;                  5       (       a  [        S5      e[%        U[B        5      (       a?  UR?                  XXR@                  5      nURU                  UX~URK                  U5      5      nO[        SU SU 35      eU(       d/  [%        U[V        RX                  5      (       a  UR[                  5       nUn GM     SSS5        U$ ! , (       d  f       U$ = f)z
This redistribute the local tensor (torch.Tensor) from the current DTensorSpec to
the target DTensorSpec, which involves the necessary collective calls to transform
the local shard of the DTensor from its current spec to the target spec.
z)Cross device mesh comm not supported yet!NzUuse_strided_shard_as_shard_order should be initialized in DTensorSpec.__post_init__())r  r   r+   zredistribute from z to z not supported yetz?Redistribute from _StridedShard to Shard is not implemented yetzUnexpected placement z& for redistribute to target placement z&Redistribution from one partial type (z) to another (z) is unsupported.zARedistribute from Partial to _StridedShard is not implemented yetz?Redistribute from Shard to _StridedShard is not implemented yet).rL   r   r%  r[   r   r   r8  r   r  r  r   r   record_redistribute_callsr   r5  rD  
contextlibnullcontextrW   rJ   r'   r(   r   r8   r7   r   r   _reduce_valuerN   r9   r   _to_replicate_tensorr)   r   RuntimeError_reduce_shard_value_replicate_to_shardrr  rX   _to_new_shard_dim_partition_valuer   _replicate_to_strided_shardfuncolAsyncCollectiveTensorwait)r  r  r  r  r  r  new_local_tensorr}   r   optimized_transform_infos
debug_moderedistribute_contextrS   mesh_to_userk   r  r  
num_chunkspartial_speccurrent_placementtarget_placement
shard_spec
replicateds                          r   redistribute_local_tensorr    s     K,,,!"MNN44<c
 	
 ","9"9:!+"8"89###K4466 9'@
 /'@

 !:	! '(J  ! 	,,##""&@@)''((== $ 	- 	
 ##% $ 
7N.*ABB,11)''A,??OGV$))1)5J& #/ Q $0 ""$$%%''#'#9L'3'A'A$1($
 #>3JKK*44@+;>V>V+V(%%''(,UG(<%'8'M'M$16R6R($  77'.'C'C$16R6R($ ',WIT&AST  ""#'v#6 %%''#'#9L'3'G'G$16F($
 #>3JKK*44@+;>V>V+V())++'7'K'K$16U6UVW6X($ %%''!%eW!5J!~~)9)=)==+5+G+G('*88,00,(  77-Y  %/y8^_e^fg  ""$$''))#'#8L'3'D'D$1($ %%'':g}+M+M&,WIT&AST  &(,DWI^\b[cctu  (4$FM22%%''-[  ))++'-'I'I$16U6UVW6X($ %%''-Y   77 ")!=!=$16R6R"J (.'I'I"KK4S4STU4V($ %/y8^_e^fg  
 &">">! ! $4#8#8#: +LQ 8 
T U 
	T s   R%X
Xc                       \ rS rSr\   SSSS\S\\S4   S\S	\	R                  S-  S
\	R                  S-  4S jj5       r\SS j5       rSrg)Redistributei  Ninputdtensor.DTensorr}   r   .r  forward_dtypebackward_dtypec           
      $   X@l         X`l        UR                  R                  U l        Ubs  XQR                  R                  :w  aZ  UR                  R                  US9n[        UUR                  R                  [        UR                  UR                  5       US9S9nOUR                  nUR                  nXl        UR                  U:w  a"  [        X#UR                  S9n	[        UUU	USS9n
OUn
Un	[        R                   " U
U	UR"                  S9$ )Ndtyper9  strider  rL   r   rC  rC  Tr  r  requires_grad)r  r  _local_tensorr  original_dtypetor
   _specr   r   r9  r  r  rC  r  dtensorDTensorr  )ctxr  r}   r   r  r  r  r  r  r  outputs              r   forwardRedistribute.forward  s     +"0066$:M:M:S:S)S ..111FL&  ;;11&++ <<>'L !..L ;;L'""j0%\5M5MK /! F "F&K --
 	
r0   c           
         U R                   nU R                  nU R                  =(       d    U R                  nXAR                  R
                  :w  a  UR                  R                  US9n[        UR                  R                  UR                  R                  [        UR                  UR                  5       US9S9n[        UR                  UR                  UR                  S9nOUR                  nUR                  n/ n[        UR                  UR                  5       Hp  u  pUR!                  5       (       d  UR#                  5       (       a0  U	R%                  5       (       a  UR'                  [)        5       5        M_  UR'                  U	5        Mr     [        UR                  [+        U5      UR                  S9n[-        UUUUSS9n
U
R
                  U R                  :w  a  U
R                  U R                  5      n
[        UR                  [+        U5      [        UR                  UR                  5       U
R
                  S9S9n[.        R0                  " U
UUR2                  S9nUS S S S S 4$ )	Nr  r  r  )r   rC  Tr  r  r  )r  r  r  r  r  r  r  r
   r  r}   r   r   r9  r  rC  r  r9   r8   r7   r^   r   rE   r  r  r  r  )r  grad_outputprevious_specr  r  r  r  normalized_placementsr  r  r  r{  output_dtensors                r   backwardRedistribute.backward  s6   ((<<++As/A/A66<<<&4477n7ML& &&22&,,77&%++&--/(L ("..(33(44M '44L&,,L 24"<#:#:M<T<TUOG  ""g&:&:&<&<&BSBSBUBU%,,Y[9%,,V4	  V $%%23%11
 +
 <<3---YYs112F%%'("!''"))+ll
 !%33
 
 	
r0   r>   )FNN)r  r  )r?   r@   rA   rB   r  r   rE   r   r   torchr  r  r  rH   r>   r0   r   r  r    s     ,0-17
 !7
  	7

 )S.)7
 7
 {{T)7
 d*7
 7
r R
 R
r0   r  )TrQ   ri   )Or  r  r  loggingr   r   collectionsr   collections.abcr   	functoolsr   typingr   r  )torch.distributed._functional_collectivesdistributed_functional_collectivesr  torch.distributed.tensor._apitensor_apir  r   *torch.distributed.tensor._collective_utilsr	   &torch.distributed.tensor._dtensor_specr
   r   r   r   torch.distributed.tensor._utilsr   $torch.distributed.tensor.device_meshr   (torch.distributed.tensor.placement_typesr   r   r   r   r   torch.utils._debug_moder   	getLoggerr?   r   r   r   rD   r   contextmanagerr   r    r  r%   rJ   rF   r  rC   rf   rE   r{   rY   r|   rG   r   r   r   ReferenceTyper   r   r   r  r  Tensorr  autogradFunctionr  r>   r0   r   <module>r     s         # $    : : / / E Q  J ;  : 
		8	$ 48 #TD[ 7 6; ,d : 98d 98 98x A$ A A8 d$/  0B d$/n  02.5".5Y.5 3S	>*.5 
	.5b"
"!&sCx"$"L AD E#uS#X";<= E99S#X9 )S.)9 )S.)	9
 9 9 9 
9xm.)mm )S.)m )S.)	m
 
.2
23mh  	'



+Z
78 " %%% "%4
w wz .2(((  $d{( 
.	(V  .2  $d{ 
.	  -1V,,VV V
 V  $d{V V \\VrN
5>>** N
r0   