
    N j.                     >   S r SSKrSSKJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJrJrJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJrJr  SSKJr  SSK J!r!  S\S\4S jr"SSK#J$r$J%r%J&r&J'r'  S\S\(\\)-  S4   4S jr* " S S\!5      r+ " S S5      r,g)a  
Decomposition-based sharding propagation for DTensor.

When an operator doesn't have a registered sharding strategy, we derive one by
tracing through its decomposition. The decomposed ops (which do have strategies)
determine how placements propagate through the original op.
    N)Any)decomposition_table)
OpOverload)_are_we_tracing)
DeviceMesh)DTensorSpec)OpSchema
OpStrategyRuntimeSchemaInfo)expand_to_full_mesh_op_strategy)ShardingPropagator)try_find_mesh_from_args)_StridedShard	Placement	ReplicateShard)GuardOnDataDependentSymNode)TorchDispatchModeopreturnc                    U R                   nSn[        UR                  5       H?  u  p4UR                  (       a    O+UR                  R                  5       S:w  d  M8  Ub  M=  Un  O   / nUR                   HQ  nUR                  (       d  M  UR                  R                  5       S:w  d  M6  UR                  UR                  5        MS     0 nUb  X&S'   U(       a  XVS'   [        S0 UD6$ )zGInfer RuntimeSchemaInfo from an operator's schema for decomposition opsN
TensorTypestatic_argnumstatic_kwargkey )	_schema	enumerate	arguments
kwarg_onlytypekindappendnamer   )r   schemar   iargkwarg_only_nameskwargss          y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/distributed/tensor/_decompositions.py_infer_schema_info_from_opr*   !   s    ZZF MF,,->>88==?l*}/DM . >>>chhmmo=##CHH-   F "/$4 ! &v&&    )tree_anytree_flattentree_maptree_map_only	op_schema.c                 b    U R                   [        U R                  R                  5       5      -   $ N)args_schematuplekwargs_schemavalues)r0   s    r)   _extract_input_specsr7   B   s'      5)@)@)G)G)I#JJJr+   c                   ^   ^  \ rS rSrSrS\S\4U 4S jjrSS jrS\	S	\
\	-  S
S4S jrSrU =r$ )PlacementTrackingModeF   z
TorchDispatchMode that tracks DTensor placements through op execution.

Used during decomposition tracing: intercepts each op, propagates sharding
via the ShardingPropagator, and records output placements on the result tensors.
sharding_propmeshc                 :   > [         TU ]  5         Xl        X l        g r2   )super__init__r;   r<   )selfr;   r<   	__class__s      r)   r?   PlacementTrackingMode.__init__N   s    *	r+   Nc                 n   [        S X4=(       d    0 45      u  pV[        S XV45      (       d  [        SU 35      e[        XU5      nU R                  R
                  R                  U5      nUc%  U R                  R                  R                  U5      nUb  Xl        UR                  5         [        5       (       a  U R                  R                  U5      n	OU R                  R                  U5      n	U	R                  (       a  [        SU 35      eU" U0 UD6n
U R                  XR                   5        U
$ )Nc                 ^    [        U [        R                  5      (       a  [        U SU 5      $ U $ )N_spec)
isinstancetorchTensorgetattrxs    r)   <lambda>:PlacementTrackingMode.__torch_dispatch__.<locals>.<lambda>U   s&    
1ell0K0Kga!,RQRRr+   c                 "    [        U [        5      $ r2   rF   r   rJ   s    r)   rL   rM   Z       jK0r+   z(No DTensorSpec found in args/kwargs for z*Decomposition requires redistribution for )r.   r,   NotImplementedErrorr	   r;   op_to_schema_infoget)op_to_schema_info_for_single_dim_strategyschema_info_recompute_comparison_keyr    propagate_op_sharding_non_cachedpropagate_op_shardingneeds_redistributeRuntimeError_record_output_specsoutput_spec)r@   functypesargsr(   r3   r5   r0   rU   output_shardingouts              r)   __torch_dispatch__(PlacementTrackingMode.__torch_dispatch__S   s2   %-R<R &
"
 0;2N
 
 &(PQUPV&WXX T>	((::>>tD""LLPPQUV  "$/!//1"00QQO #00FFyQO--!KD6RSSD#F#!!#'B'BC
r+   outputr\   r   c                    [        U[        R                  5      (       a
  Ub  X!l        g [        U[        [
        45      (       aB  [        U[        [
        45      (       a&  [        X5       H  u  p4U R                  X45        M     g g g r2   )rF   rG   rH   rE   r4   listzipr[   )r@   rd   r\   tss        r)   r[   *PlacementTrackingMode._record_output_specsx   sl    fell++0G&L..:%4
 4
 F0))!/ 14
.r+   )r<   r;   )r   N)__name__
__module____qualname____firstlineno____doc__r   r   r?   rb   r   r   r[   __static_attributes____classcell__)rA   s   @r)   r9   r9   F   sG    &8 
 
#J03 0[3=N 0SW 0 0r+   r9   c                       \ rS rSrSr\S\S\4S j5       r\S\S\	SS4S j5       r
\S	\S\	S\S-  4S
 j5       r\S	\S\\S-     S\S\	S\\\S4   -  4
S j5       r\S	\S\\\S-        4S j5       rSrg)DecompShardingStrategy   a-  
Generates sharding strategies for ops by tracing through their decompositions.

For each candidate input placement combination, runs the decomposition on meta
tensors under PlacementTrackingMode to determine the output placement. These
single-dimension strategies are then expanded to the full mesh.
r   r   c                 B    U [         ;   =(       d    U R                  5       $ r2   )r   _can_decompose)r   s    r)   
has_decomp!DecompShardingStrategy.has_decomp   s     ((?B,=,=,??r+   r;   Nc                 V    XR                   ;  a  [        U 5      nX!R                   U '   gg)zi
Register schema_info for decomposition op on first invocation.
Needed for correct shard prop cache key.
N)rR   r*   )r   r;   rU   s      r)   ensure_schema_info)DecompShardingStrategy.ensure_schema_info   s-     4444R8K2=++B/ 5r+   r0   c                    [        S U R                  U R                  45      (       d  g [        R	                  U 5      n[        U R                  U R                  [        U R                  R                  5       5      -   5      n[        UR                  S/SSS9n/ n/ nU HZ  n [        R                  XXA5      n[#        U[        5      (       d  U/O
[%        U5      nUR'                  U[%        U5      -   5        M\     U(       d  [)        S5      e[+        U5      n	UR-                  U 5      n
[/        X:XYS9$ ! [         a       g [         a       g [        [        [         4 a     M  f = f)Nc                 "    [        U [        5      $ r2   rO   rJ   s    r)   rL   ;DecompShardingStrategy.propagate_strategy.<locals>.<lambda>   rP   r+   r   F)_init_backend_rankzGSharding propagation should have produced at least Replicate() strategy)input_index)r,   r3   r5   rs   _get_candidate_placementsr   r   r4   r6   r   device_type_propagate_through_decomprQ   r   rZ   KeyError
IndexErrorrF   rf   r"   AssertionErrorlen_wrap_with_op_strategyr   )r0   r;   candidate_placementsr<   	fake_meshsingle_dim_strategiesoutput_placementsinput_placementsrd   	n_outputsstrategy_schemas              r)   propagate_strategy)DecompShardingStrategy.propagate_strategy   st    0""I$;$;<
 
 5OO 
 'LL!!E)*A*A*H*H*J$KK
 t//!EQRS	 "EG 4/II !+65 9 9tF|  "(():TBR=S)ST# !5& % Y  )*	'>>yI.#8
 	
+ ' .  (J7  s   $D66
E'	E'E'&E'	placementr<   .c                   ^^ U R                   nU[        ;   a
  [        U   nO0UR                  5       (       a  UR                  nO[	        SU 35      e[        U5      mUU4S jnSSKJn  U" 5          [        X`R                  5      n[        X`R                  5      n	[        UT5         U" U0 U	D6n
S S S 5        S S S 5        S n[        UW
5      n[        U[        [        45      (       a5  U Vs/ s H	  oc  M  UPM     nn[        U5      S:X  a  US   $ [        U5      $ U$ ! , (       d  f       Nw= f! , (       d  f       N= fs  snf )NzNo decomposition found for c                    > [        T5      n[        U [        5      (       aQ  [        R                  " U R
                  U R                  R                  SS9n[        TU4U R                  S9Ul        U$ U $ )Nmeta)dtypedevice)tensor_meta)	nextrF   r   rG   emptyshaper   r   rE   )rK   pr   r<   placement_iters      r)   to_metaADecompShardingStrategy._propagate_through_decomp.<locals>.to_meta   sZ    ^$A![)){{177!--2E2EfU(tO
Hr+   r   )maybe_disable_local_tensor_modec                     [        U [        R                  5      (       a%  [        U SS 5      nU(       a  UR                  S   $ S $ g )NrE   r   )rF   rG   rH   rI   
placements)rh   specs     r)   get_placementGDecompShardingStrategy._propagate_through_decomp.<locals>.get_placement   s:    !U\\**q'40-1tq);t;r+      )r   r   rv   	decomposerQ   itertorch.distributed._local_tensorr   r.   r3   r5   r9   rF   r4   rf   r   )r0   r   r<   r;   r   	decomp_fnr   r   	args_metakwargs_metard   r   resultr   flatr   s     `            @r)   r   0DecompShardingStrategy._propagate_through_decomp   s    \\$$+B/I  I%(CB4&HIIi	 	T,. *?*?@I"7,C,CDK&}d;"I== < /	 -0fudm,,%7v!AvD7!$i1n47=%+= <; /.  8s0   27D4)	D#2D40E:E#
D1	-D44
Ec                   ^	 [        U 5      n[        [        U5      5      u  p#[        5       1m	[	        [
        U	4S jU5        / nU H  n[        U[
        5      (       d  UR                  S /5        M,  [        T	5      nT	 H  n[        U[        5      (       a<  U[        UR                  5       Vs1 s H  n[        XR                  S9iM     sn-  nMT  [        U[        5      (       d  Mk  U[        UR                  5       Vs1 s H  n[        U5      iM     sn-  nM     UR                  [        U5      5        M     [        [        R                  " U6 5      $ s  snf s  snf )Nc                 :   > TR                  U R                  5      $ r2   )updater   )r   all_placementss    r)   rL   BDecompShardingStrategy._get_candidate_placements.<locals>.<lambda>  s    ..t?r+   )split_factor)r7   r-   rf   r   r/   r   rF   r"   setr   rangendimr   r   	itertoolsproduct)
r0   tensor_specs
flat_specs_
candidatesr   optionsr   r%   r   s
            @r)   r   0DecompShardingStrategy._get_candidate_placements  s0    ,I6$T,%78
 +4+?	
 46
DdK00!!4&)n-'A!!]33%*499%5$%5 *!..I%5$  $Au--eDII6F#G6FE!H6F#GG ( !!$w-0   I%%z233$
 $Hs   0E
E 
r   )rk   rl   rm   rn   ro   staticmethodr   boolrw   r   rz   r	   r
   r   r4   r   r   r   rf   r   rp   r   r+   r)   rs   rs      s    @z @d @ @ >z >:L >QU > > 8
8
,>8
	d	8
 8
t 00T)*0 0 *	0
 
U9c>*	*0 0d  4 4	eI$%	& 4  4r+   rs   )-ro   r   typingr   rG   torch._decompr   
torch._opsr   )torch.distributed._functional_collectivesr   torch.distributed.device_meshr   &torch.distributed.tensor._dtensor_specr   #torch.distributed.tensor._op_schemar	   r
   r   #torch.distributed.tensor._ops.utilsr   'torch.distributed.tensor._sharding_propr   torch.distributed.tensor._utilsr   (torch.distributed.tensor.placement_typesr   r   r   r   %torch.fx.experimental.symbolic_shapesr   torch.utils._python_dispatchr   r*   torch.utils._pytreer,   r-   r.   r/   r4   objectr7   r9   rs   r   r+   r)   <module>r      s       - ! E 4 > W W O F C  N :': '2C '< P OKH K{V7KS7P1Q K90- 90xg4 g4r+   