
    R jR                         % S SK r S SKJr  S SKrS SKJr  S SKJr  S SKJ	r	   " S S\5      r
 " S S	\5      r " S
 S\5      r " S S\5      rSq\\R                   S-     S-  \S'   S\R$                  4S jrg)    N)chain)_get_device_index)Function)commc                   4    \ rS rSr\S 5       r\S 5       rSrg)	Broadcast
   c                   ^ [        S U 5       5      (       d  [        S5      eU Vs/ s H  n[        US5      PM     nnXl        [	        U5      S:X  a  g[	        U5      U l        US   R                  5       U l        U Vs/ s H  oDR                  5       PM     snU l	        [        R                  " X R                  5      nU HD  n[        U R                  5       H(  u  pxU(       d  M  [        R                  " Xg   5      Xg'   M*     MF     / n	[        U R                  SS  5       H*  u  mn
U
(       a  M  U	R!                  U4S jU 5       5        M,     U R"                  " U	6   [%        [&        R(                  " U5      5      $ s  snf s  snf )Nc              3   R   #    U  H  oR                   R                  S :g  v   M     g7fcpuNdevicetype.0is     m/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/nn/parallel/_functions.py	<genexpr>$Broadcast.forward.<locals>.<genexpr>        :6a88==E)6   %'z2Broadcast function not implemented for CPU tensorsTr       c              3   ,   >#    U  H	  oT   v   M     g 7fNr   )r   outputidxs     r   r   r   "   s     *MW6#;Ws   )allAssertionErrorr   target_gpuslen
num_inputs
get_deviceinput_device
is_complexcomplex_maskr   broadcast_coalesced	enumeratetorchview_as_complexneeds_input_gradextendmark_non_differentiabletupler   from_iterable)ctxr!   inputsxinpoutputsdevice_outputsr   r&   non_differentiablesinput_requires_gradr   s              @r   forwardBroadcast.forward   sT   :6::: !UVV;FG;a(D1;G%v;!V!!9//18>?NN,?**6??C%N!*3+;+;!<:(-(=(=n>O(PN% "= &
 !(1#2F2Fqr2J(K$C$&&#***MW*MM )L 	##%89U((122+ H @s   FFc                 d    [         R                  " U R                  U R                  /UQ76 nSU-   $ )Nr   )ReduceAddCoalescedapplyr%   r#   )r1   grad_outputsgradss      r   backwardBroadcast.backward&   s5    "((cnn
/;
     r   N__name__
__module____qualname____firstlineno__staticmethodr9   r@   __static_attributes__r   rB   r   r   r   
   s(    3 34  rB   r   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r<   /   c                    [        S[        U5      U5       Vs/ s H  oCU   R                  5       PM     snU l        [        U5       Vs/ s H  oCU   R	                  5       PM     nnXPl        [        S U 5       5      n[        S[        U5      U5       Vs/ s H	  nXdXB-    PM     nn[        R                  " Xq5      n[        S [        X5       5       5      nU$ s  snf s  snf s  snf )Nr   c              3   z   #    U  H1  oR                  5       (       a  [        R                  " U5      OUv   M3     g 7fr   )r&   r*   view_as_realr   gs     r   r   -ReduceAddCoalesced.forward.<locals>.<genexpr>9   s+       
DIq\\^^Eq!:Es   9;c              3   d   #    U  H&  u  pU(       a  [         R                  " U5      OUv   M(     g 7fr   r*   r+   )r   rr&   s      r   r   rQ   C   s+      
!; )3E!!!$9!;s   .0)
ranger"   r$   r!   r&   r'   r/   r   reduce_add_coalescedzip)	r1   destinationr#   r?   r   r'   grads_convertedgrads_resultss	            r   r9   ReduceAddCoalesced.forward0   s     ,1CJ
+K
+Ka!H!+K
 8=Z7HI7H!a++-7HI'  
DI 
 
 1c/2J?
? /? 	 
 ++F@ 
!$W!;
 

 -
 J
s   C
C"C'c                 J    S[         R                  " U R                  /UQ76 -   $ )NNN)r   r=   r!   )r1   r>   s     r   r@   ReduceAddCoalesced.backwardJ   s(    
 OOCOO;l;< 	<rB   r   NrC   r   rB   r   r<   r<   /   s(     2 < <rB   r<   c                   4    \ rS rSr\S 5       r\S 5       rSrg)GatherR   c                   ^  [        S U 5       5      (       d  [        S5      eUS:X  a  ST l        O[        US5      nUT l        UT l        [        S U 5       5      T l        [        S U 5       5      (       a5  US:X  a/  [        S U 5       5      n[        R                  " S	S
S9  ST l	        OST l	        [        U 4S jU 5       5      T l
        [        U5      S:  =(       a    US   R                  5       n[        R                  " UT R                  T R                  5      nU(       a  [        R                   " U5      nU$ )Nc              3   R   #    U  H  oR                   R                  S :g  v   M     g7fr   r   r   s     r   r   !Gather.forward.<locals>.<genexpr>U   r   r   z/Gather function not implemented for CPU tensorsr   Tc              3   @   #    U  H  oR                  5       v   M     g 7fr   )r$   r   s     r   r   re   ]   s     >v!||~~vs   c              3   F   #    U  H  oR                  5       S :H  v   M     g7fr   N)dimr   ts     r   r   re   ^   s     ,Vuuw!|Vs   !r   c              3   B   #    U  H  oR                  S 5      v   M     g7f)r   N)viewrj   s     r   r   re   _   s     5f66!99fs   zvWas asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.   )
stacklevelFc              3   X   >#    U  H  oR                  TR                  5      v   M!     g 7fr   )sizeri   )r   r   r1   s     r   r   re   i   s     @Aswws   '*)r   r    target_devicer   ri   r/   
input_gpuswarningswarnunsqueezed_scalarinput_sizesr"   r&   r   gatherr*   r+   )r1   rr   ri   r2   r&   r   s   `     r   r9   Gather.forwardS   s   :6::: !RSSE! %C-mTBM -C>v>>,V,,,5f55FMM' 	 %)C!$)C!@@@[1_?)=)=)?
VSWWc.?.?@**62FrB   c                     [         R                  U R                  U R                  U R                  U5      nU R
                  (       a  [        S U 5       5      nSU-   $ )Nc              3   *   #    U  H	  oS    v   M     g7frh   r   rO   s     r   r   "Gather.backward.<locals>.<genexpr>z   s     #B/QaD/s   r^   )Scatterr=   rs   rw   ri   rv   r/   )r1   grad_outputscattered_gradss      r   r@   Gather.backwardt   sK    !--NNCOOSWWk
   ##B/#BBOo--rB   r   NrC   r   rB   r   ra   ra   R   s)     @ . .rB   ra   c                   4    \ rS rSr\S 5       r\S 5       rSrg)r}   ~   c                 H   U Vs/ s H  n[        US5      PM     nnX0l        UR                  R                  S:w  a  UR	                  5       OSU l        S n[        R                  R                  5       (       a?  U R
                  S:X  a/  U Vs/ s H"  n[        [        R                  " U5      5      PM$     nnUR                  5       n[        R                  " XAX R                  U5      n	U(       a  [        S U	 5       5      n	Ub  [        U	5       Hq  u  p[        R                  R                  X   5         [        R                  R!                  5       nUR#                  Xj   5        UR%                  U5        S S S 5        Ms     U	$ s  snf s  snf ! , (       d  f       M  = f)NTr   c              3   N   #    U  H  n[         R                  " U5      v   M     g 7fr   rS   )r   os     r   r   "Scatter.forward.<locals>.<genexpr>   s     FgE11!44gs   #%)r   ri   r   r   r$   r%   r*   acceleratoris_available_get_streamr&   r   scatterr/   r)   device_indexcurrent_streamwait_streamrecord_stream)r1   r!   chunk_sizesri   inputr3   streamsr   r&   r5   r   r   main_streams                r   r9   Scatter.forward   sO   ;FG;a(D1;G161B1Be1K5++-QS))++0@0@B0FGRS{V{5<<#78{GS%%'
,,u;QFgFFG &w/	&&33KNC"'"3"3"B"B"DK++GJ7((5 DC 0
 - H T DCs   F)F7AF
F!	c                 b    S S S [         R                  " U R                  U R                  /UQ76 4$ r   )ra   r=   r%   ri   )r1   r~   s     r   r@   Scatter.backward   s+    T4c.>.>!V+!VVVrB   r   NrC   r   rB   r   r}   r}   ~   s*     2 W WrB   r}   _streamsr   c                 Z   U R                   S:X  d#  [        R                  R                  5       (       d  g[        R                  R	                  5       R                   U R                   :w  aA  [        S[        R                  R	                  5       R                    SU R                    35      e[        c"  S/[        R                  R                  5       -  q[        U R                     c1  [        R                  " U R                  5      [        U R                  '   [        U R                     $ )zBGet a background stream for copying between CPU and target device.r   Nz"Expected current accelerator type z to match device type )
r   r*   r   r   current_acceleratorr    r   device_countindexStream)r   s    r   r   r      s     {{e5#4#4#A#A#C#C,,.33v{{B01B1B1V1V1X1]1]0^ _$$*KK=2
 	
 6E--::<<%!&fll!;FLL!!rB   )rt   	itertoolsr   r*   torch._utilsr   torch.autogradr   torch.nn.parallelr   r   r<   ra   r}   r   listr   __annotations__r   r   r   rB   r   <module>r      s|       * # "" "J <  <F).X ).XWh WB .2$u||d"
#d
* 1" "rB   