
    N j                       S SK Jr  S SKrS SKrS SKrS SKrS SKJr  S SKJ	r	J
r
Jr  S SKrS SKJr  S SKrS SKJr  S SKJr  S SKJrJr  S S	KJr  S
SKJrJrJr  S
SKJrJ r J!r!  SSK"J#r#J$r$J%r%J&r&J'r'J(r(  SSK)J*r*J+r+J,r,  \(       a  S SKJ-r-  S
SK.J/r/J0r0  S
SK1J2r2J3r3  SSK"J4r4  \Rj                  " \65      r7\Rp                  S\Rr                  S\Rt                  S\Rv                  S\Rx                  S\Rz                  S\R|                  S\R~                  S\R                  S0	rAS'S jrB " S S\5      rC " S  S!\'5      rD\DR                  S"5        \DR                  5          " S# S$\+5      rG " S% S&\,5      rHg)(    )annotationsN)Path)AnyOptionalTYPE_CHECKING)
PRECEDENCE)_embed_headers)
OrderedSet)
CppPrinterExprPrinter)ValueRanges   )ceildivget_bounds_index_exprget_kernel_metadata)ops
OpsWrapperV   )CSEVariableDeferredLineDTYPE_TO_COMPUTATION_DTYPEIndentedBufferOpOverridesPythonPrinter)IterationRangesEntry
SIMDKernelSIMDScheduling)Union)ReductionType	StoreMode)	SchedulerSchedulerNode)OpVarTboolcharshortintlongucharfloathalfbfloatc                    [        U [        5      (       a<  U [        R                  :X  a  gU [        R                  * :X  a  gX :w  a  g[	        U 5      $ [        U [
        5      (       a  U (       a  S$ S$ [	        U 5      $ )N	HUGE_VALFz
-HUGE_VALFNANtruefalse)
isinstancer+   torchinfstrr%   )vals    l/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/_inductor/codegen/mps.pyvalue_to_metalr9   8   sc    #u%))UYYJZ3x	C		v)')s8O    c                      \ rS rSrSrSS jrSS jrSS jrSS jrSS jr	SS jr
SS	 jrSS
 jrSS jrSS jrSS jrSS jr\rSS jrSS jrSS jrSrg)MetalExprPrinterF   z/Converts sympy expression to Metal code snippetc                    UR                   u  p#U R                  U5      nU R                  U5      nUR                  (       a	  SU SU S3$ SU SU S3$ )Nc10::metal::floor_divide(, )metal::floor() / (argsdoprint
is_integer)selfexprxdivs       r8   _print_FloorDiv MetalExprPrinter._print_FloorDivI   sY    LLOll3??.qcC5::qcse1--r:   c                8   UR                   u  p#nUS:H  =(       a
    XDS-
  -  S:g  nU R                  U5      nUS:w  a5  U R                  U5      nUR                  (       a
  SU SU S3nO	SU SU S3nU R                  U5      nU(       a	  SU S	U S3$ SU S
U S3$ )Ni   r   r   (rC   rA   rB   zc10::metal::safe_mod(r@   z) % (rD   )rH   rI   rJ   rK   moduse_safe_mods         r8   _print_ModularIndexing'MetalExprPrinter._print_ModularIndexingQ   s    iie|>aQ(>LLO!8,,s#Cs%uA&#A3eC52ll3*1#RuA661#U3%q!!r:   c                    [        UR                  5      S:w  a  [        S5      e[        U R                  UR                  5      u  p#SU SU SU S3nSU SU SU S3nSU SU S3$ )	Nr   z$metal::min only supported for 2 argsstatic_cast<decltype(+)>(rA   zmetal::min(r@   lenrE   RuntimeErrormap_printrH   rI   ab
typecast_a
typecast_bs         r8   
_print_MinMetalExprPrinter._print_Minb   }    tyy>QEFF4;;		*,QCq3qc;
,QCq3qc;
ZL:,a88r:   c                    [        UR                  5      S:w  a  [        S5      e[        U R                  UR                  5      u  p#SU SU SU S3nSU SU SU S3nSU SU S3$ )	Nr   z$metal::max only supported for 2 argsrU   rV   rW   rA   zmetal::max(r@   rX   r]   s         r8   
_print_MaxMetalExprPrinter._print_Maxk   rd   r:   c                |    [        UR                  5      S:X  d   eSU R                  UR                  S   5       S3$ )Nr   metal::abs(r   rA   rY   rE   r\   rH   rI   s     r8   
_print_AbsMetalExprPrinter._print_Abst   s9    499~"""T[[167q99r:   c                |    [        UR                  5      S:X  d   eSU R                  UR                  S   5       S3$ )Nr   zstatic_cast<long>(metal::rint(r   ))rj   rk   s     r8   _print_RoundToInt"MetalExprPrinter._print_RoundToInty   s9    499~"""/DIIaL0I/J"MMr:   c                    [        UR                  5      S:X  d   eUR                  u  p#UR                  (       a  US:  d   e[        SU S35      eU R	                  U[
        S   5      nSU SU SU*  S	3$ )
Nr   r   zOFor integer inputs, only non-negative ndigits are currently supported, but got .Mulz!static_cast<float>(metal::rint(1e * z) * 1erA   )rY   rE   rG   
ValueErrorparenthesizer   )rH   rI   numberndigits
number_strs        r8   _print_RoundDecimal$MetalExprPrinter._print_RoundDecimal~   s    499~"""))Q;;abiajjkl  &&vz%/@A
27)3zl&RYQYPZZ[\\r:   c                l    UR                   u  p#SU R                  U5       SU R                  U5       S3$ )Nstatic_cast<float>(z) / static_cast<float>(rA   )rE   r\   )rH   rI   lhsrhss       r8   _print_IntTrueDiv"MetalExprPrinter._print_IntTrueDiv   s;    99 %T[[%5$66MdkkZ]N^M__`aar:   c                    [        UR                  5      S:X  d   e[        U R                  UR                  5      u  p#SU SU S3$ )Nr   z'metal::precise::pow(static_cast<float>(z), static_cast<float>(ro   )rY   rE   r[   rF   )rH   rI   rJ   ys       r8   _print_PowByNatural$MetalExprPrinter._print_PowByNatural   sG    499~"""4<<+8;QRSQTTVWWr:   c                    [        UR                  5      S:X  d   eU R                  UR                  S   5      nSU S3$ )Nr   r   r~   rA   rY   rE   rF   rH   rI   rJ   s      r8   _print_ToFloatMetalExprPrinter._print_ToFloat   s=    499~"""LL1&$QCq))r:   c                b    UR                   (       a  [        [        U5      5      $ [        U5      $ N)rG   r6   r(   rk   s     r8   _print_FloatMetalExprPrinter._print_Float   s#    ?? s4y>!t9r:   c                    [        UR                  5      S:X  d   eU R                  UR                  S   5      nSU S3$ )Nr   r   z1static_cast<int>(metal::floor(static_cast<float>(z)))r   r   s      r8   _print_FloorToInt"MetalExprPrinter._print_FloorToInt   s=    499~"""LL1&B1#SIIr:   c                    [        UR                  5      S:X  d   eU R                  UR                  S   5      nSU S3$ )Nr   r   zstatic_cast<int>(metal::trunc(ro   r   r   s      r8   _print_TruncToInt"MetalExprPrinter._print_TruncToInt   s=    499~"""LL1&/s"55r:   c                    [        UR                  5      S:X  d   eU R                  UR                  S   5      nSU S3$ )Nr   r   zmetal::precise::log2(rA   r   r   s      r8   _print_OpaqueUnaryFn_log2*MetalExprPrinter._print_OpaqueUnaryFn_log2   s=    499~"""LL1&&qc++r:   c                J   ^  U 4S jUR                    5       u  p#nU SU SU 3$ )Nc              3  \   >#    U  H!  nTR                  U[        S    S-
  5      v   M#     g7f)Atomg      ?N)rw   r   ).0argrH   s     r8   	<genexpr>0MetalExprPrinter._print_Where.<locals>.<genexpr>   s.      
HQDc:f#5#;<<	s   ), ? z : )rE   )rH   rI   cpqs   `    r8   _print_WhereMetalExprPrinter._print_Where   s3    
HL		
a Cs#aS!!r:    N)rI   
sympy.Exprreturnr6   )__name__
__module____qualname____firstlineno____doc__rL   rR   rb   rf   rl   rp   r{   r   r   r   r   r   _print_floorr   r   r   __static_attributes__r   r:   r8   r<   r<   F   s\    9.""99:
N

]bX
*
J
 %L6
,
"r:   r<   c                     \ rS rSrSr\  S2         S3S jj5       r\        S4S j5       r\S5S j5       r\S6S j5       r	\S7S j5       r
\S8S	 j5       r\S9S
 j5       r\S:S j5       r\S:S j5       r\S:S j5       r\S:S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S;S j5       r\S<S j5       r\S;S j5       r\S;S j5       r \S;S j5       r!\S;S  j5       r"\S;S! j5       r#\S:S" j5       r$\S;S# j5       r%\S;S$ j5       r&\S:S% j5       r'\S;S& j5       r(\S:S' j5       r)\S;S( j5       r*\S=S) j5       r+\S=S* j5       r,\          S>S+ j5       r-\S;S, j5       r.\S:S- j5       r/S?S. jr0S@S/ jr1\2SAS0 j5       r3S1r4g)BMetalOverrides   zXImplements Metal-specific overrides for ops. Base class emits Python-friendly overrides.Nc                    U[         R                  :X  a  [        R                  S5        SU  S3$ S[        U    SU  S3$ )Nz>float64 cast requested, probably from tensorify_python_scalarsr~   rA   static_cast<>()r4   doublelogwarningDTYPE_TO_METAL)rJ   dtype	src_dtypeuse_compute_typess       r8   to_dtypeMetalOverrides.to_dtype   sK     ELL KKP )1--nU34Bqc;;r:   c                6    S[         U    S[         U    SU  S3$ )Nzas_type<z>(static_cast<r   ro   r   )rJ   r   r   s      r8   to_dtype_bitcastMetalOverrides.to_dtype_bitcast   s/     ./0~i?X>YY[\][^^`aar:   c                    [        U 5      $ r   r9   )r7   r   s     r8   constantMetalOverrides.constant   s    c""r:   c                :   [         R                  R                  [         R                  R                  U 5      5      n[         R                  R                  R                  [         R                  R                  U[        U 5      S9n[        R                  " X15      $ )N)bounds)
r   kernelindex_to_strprepare_indexingcsegeneratecomputer   r   r   )rI   r   idx_strvars       r8   
index_exprMetalOverrides.index_expr   sj    ((''(A(A$(GHhhll##HHg.CD.I $ 
 ||C''r:   c           	        [        U5      n[        5       n[        R                  R	                  U5         UR                  5          [        R                  " 5       [        R                  R                  l	        S[        R                  R                  l
        U" 5       nS S S 5        S S S 5        U  SUR                  5        SU 3n[        R                  R                  R                  U5      nU(       Gd_  [        R                  R                  R                  WR                  S9n[        R                  R                  R                  Xg5        [        R                  R                   R#                  [$        UR                      SU S3SU  S3/5        [        R                  R                   R                  5          [        R                  R                   R'                  U5        [        R                  R                   R)                  U SU S	U S
35        S S S 5        [        R                  R                   R)                  SU SU S	U S
35        U$ ! , (       d  f       GN= f! , (       d  f       GN= f! , (       d  f       Ng= f)Ntmp_scoped_:r    ;if () {z = static_cast<decltype(rW   );z} else )r9   r   r   r   swap_buffersindent	itertoolscountr   iter_buffer_idsname_prefixgetvaluetry_getnewvarr   putr   
writelinesr   splice	writeline)maskbodyother	other_strscoped_bodyrc	cache_keyr   s           r8   maskedMetalOverrides.masked   s   
 #5)	$&XX"";/1C1C1E ,5??+<AHHLL('4AHHLL$B 2F/ fAk2245QykB	hhll""9-((,,%%BHH%5CHHLLY,HH''"288,-Qse15dV47HI !!((*  ''4  **e3C5B4rB +
 HH&&3%7uC	{"M 
3 2F1E//" +*s1   I+AII+ AI=
I(	#I++
I:=
Jc           	     0    U  SU SU S[        U5       S3$ )Nr   z : static_cast<decltype(rW   rA   r   )r^   r_   r   s      r8   whereMetalOverrides.where   s(    Cs21#S9J8K1MMr:   c                    SU  SU S3$ )Nzc10::metal::remainder(r@   rA   r   r^   r_   s     r8   	remainderMetalOverrides.remainder  s    's"QCq11r:   c                D    SU  SU SU  S3nSU  SU SU S3nSU SU S3$ )NrU   rV   rW   rA   zc10::metal::max(r@   r   r^   r_   r`   ra   s       r8   maximumMetalOverrides.maximum  K    ,QCq3qc;
,QCq3qc;
!*R
|1==r:   c                D    SU  SU SU  S3nSU  SU SU S3nSU SU S3$ )NrU   rV   rW   rA   zc10::metal::min(r@   r   r   s       r8   minimumMetalOverrides.minimum  r   r:   c                    U  SU 3$ )Nz || r   r   s     r8   
logical_orMetalOverrides.logical_or      D}r:   c                    U  SU 3$ )Nz && r   r   s     r8   logical_andMetalOverrides.logical_and  r  r:   c                    SU  S3$ )Nzmetal::isnan(rA   r   rJ   s    r8   isnanMetalOverrides.isnan      qc##r:   c                    SU  S3$ )Nzmetal::isinf(rA   r   r	  s    r8   isinfMetalOverrides.isinf   r  r:   c                    SU  S3$ )Nzmetal::precise::log(rA   r   r	  s    r8   r   MetalOverrides.log$      %aS**r:   c                    SU  S3$ )Nzmetal::precise::exp(rA   r   r	  s    r8   expMetalOverrides.exp(  r  r:   c                    SU  S3$ )Nri   rA   r   r	  s    r8   absMetalOverrides.abs,  s    QCq!!r:   c                    SU  S3$ )Nzmetal::signbit(rA   r   r	  s    r8   signbitMetalOverrides.signbit0  s     1%%r:   c                    SU  S3$ )Nzmetal::precise::sin(rA   r   r	  s    r8   sinMetalOverrides.sin4  r  r:   c                    SU  S3$ )Nzc10::metal::sinc(rA   r   r	  s    r8   sincMetalOverrides.sinc8  s    "1#Q''r:   c                    SU  S3$ )Nzmetal::precise::cos(rA   r   r	  s    r8   cosMetalOverrides.cos<  r  r:   c                    SU  S3$ )Nzmetal::precise::tan(rA   r   r	  s    r8   tanMetalOverrides.tan@  r  r:   c                    SU  S3$ )Nzmetal::precise::asin(rA   r   r	  s    r8   asinMetalOverrides.asinD      &qc++r:   c                    SU  S3$ )Nzmetal::precise::acos(rA   r   r	  s    r8   acosMetalOverrides.acosH  r+  r:   c                    SU  S3$ )Nzmetal::precise::atan(rA   r   r	  s    r8   atanMetalOverrides.atanL  r+  r:   c                    SU  SU S3$ )Nz::metal::precise::atan2(r@   rA   r   )rJ   r   s     r8   atan2MetalOverrides.atan2P  s    )!Bqc33r:   c                    SU  S3$ )Nzmetal::precise::sqrt(rA   r   r	  s    r8   sqrtMetalOverrides.sqrtT  r+  r:   c                    SU  SU  S3$ )NrU   z)>(-rA   r   r	  s    r8   negMetalOverrides.negX  s     'qcaS22r:   c                    SU  S3$ )Nzmetal::precise::rsqrt(rA   r   r	  s    r8   rsqrtMetalOverrides.rsqrt^      's!,,r:   c                    SU  S3$ )Nzmetal::precise::tanh(rA   r   r	  s    r8   tanhMetalOverrides.tanhb  r+  r:   c                    SU  S3$ )Nzmetal::precise::atanh(rA   r   r	  s    r8   atanhMetalOverrides.atanhf  r>  r:   c                    SU  SU S3$ )Nr?   r@   rA   r   r   s     r8   floordivMetalOverrides.floordivj  s     +1#Rs!44r:   c                    SU  S3$ )NrB   rA   r   r	  s    r8   floorMetalOverrides.flooro  r  r:   c                    SU  S3$ )Nzmetal::sign(rA   r   r	  s    r8   signMetalOverrides.signs      aS""r:   c                D    SU  SU SU  S3nSU  SU SU S3nSU SU S3$ )NrU   rV   rW   rA   zmetal::fmod(r@   r   r   s       r8   fmodMetalOverrides.fmodw  sK    ,QCq3qc;
,QCq3qc;
j\J<q99r:   c                    SU  S3$ )Nmetal::trunc(rA   r   r	  s    r8   truncMetalOverrides.trunc}  r  r:   c                    U  SU 3nU R                   b  U R                   R                  (       d(  UR                   b!  UR                   R                  (       a  SU S3$ U$ )Nz / rS  rA   )r   is_floating_point)r^   r_   quots      r8   truncdivMetalOverrides.truncdiv  sQ    Cs|GGAGG$=$=GGAGG$=$="4&**r:   c                    SU  S3$ )Nzmetal::ceil(rA   r   r	  s    r8   ceilMetalOverrides.ceil  rN  r:   c                f    [         R                  R                  R                  S5        SU  SU S3$ )Nrandomzc10::metal::rand(r@   rA   r   r   headersaddseedoffsets     r8   randMetalOverrides.rand  s/    	X&"4&6(!44r:   c                f    [         R                  R                  R                  S5        SU  SU S3$ )Nr_  zc10::metal::randn(r@   rA   r`  rc  s     r8   randnMetalOverrides.randn  s/    	X&#D6F8155r:   c           	     r    [         R                  R                  R                  S5        SU  SU SU SU S3	$ )Nr_  zc10::metal::randint64(r@   rA   r`  )rd  re  lowhighs       r8   	randint64MetalOverrides.randint64  s=     	
X&'vRxr#baHHr:   c                    SU  S3$ )Nzmetal::rint(rA   r   r	  s    r8   roundMetalOverrides.round  rN  r:   c                D    SU  SU SU  S3nSU  SU SU S3nSU SU S3$ )NrU   rV   rW   rA   zmetal::pow(r@   r   )r^   r_   cast_acast_bs       r8   powMetalOverrides.pow  sK    (1QCs1#Q7(1QCs1#Q7VHBvha00r:   c                f    [         R                  R                  R                  S5        SU SU S3$ )Nspecial_mathc10::metal::rO   rA   r`  )rH   r^   names      r8   _special_unaryMetalOverrides._special_unary  s/    	^,dV1QCq))r:   c                l    [         R                  R                  R                  S5        SU SU SU S3$ )Nry  rz  rO   r@   rA   r`  )rH   r^   r_   r{  s       r8   _special_binaryMetalOverrides._special_binary  s5    	^,dV1QCr!A..r:   c           
        S H,  n[        X[        R                  " U R                  US95        M.     [        R                  " U R                  SS9U l        S H0  n[        U U[        R                  " U R                  US-   S95        M2     S H,  n[        X[        R                  " U R
                  US95        M.     S H0  n[        U U[        R                  " U R
                  US-   S95        M2     g )N)erferfinvi0i0ei1i1edigammaspherical_bessel_j0)r{  	log_gamma)
	bessel_j0	bessel_j1	bessel_y0	bessel_y1modified_bessel_i0modified_bessel_i1modified_bessel_k0modified_bessel_k1scaled_modified_bessel_k0scaled_modified_bessel_k1_forward)	polygammaigammaigammaczeta)
chebyshev_polynomial_tchebyshev_polynomial_uchebyshev_polynomial_vchebyshev_polynomial_whermite_polynomial_hhermite_polynomial_heshifted_chebyshev_polynomial_tshifted_chebyshev_polynomial_ushifted_chebyshev_polynomial_vshifted_chebyshev_polynomial_w)setattr	functoolspartialmethodr|  lgammar  )clsr{  s     r8   _initialize_special_ops&MetalOverrides._initialize_special_ops  s    	
D Cy66s7I7IPTUV	
 ,,S-?-?kR

D ''(:(:
ARS
&
D Cy66s7J7JQUVW

D ''(;(;$BST
r:   r   NT)
rJ   r   r   torch.dtyper   zOptional[torch.dtype]r   r%   r   r6   )rJ   r   r   r  r   r  r   r6   )r7   zUnion[bool, float, int]r   r  r   r6   )rI   r   r   r  r   r6   )r   r   r   r   r   r   r   r6   )r^   r$   r_   r$   r   r$   r   r6   )r^   r$   r_   r$   r   r6   )r^   r   r_   r   r   r6   )rJ   r   r   r6   )rJ   r   r   r   r   r6   )rd  r   re  r   r   r6   )
rd  r   re  r   rl  r   rm  r   r   r6   )r^   r   r{  r6   r   r6   )r^   r   r_   r   r{  r6   r   r6   r   None)5r   r   r   r   r   staticmethodr   r   r   r   r   r   r   r   r   r  r  r
  r  r   r  r  r  r  r   r#  r&  r)  r-  r0  r3  r6  r9  r<  r@  rC  rF  rI  rL  rP  rT  rY  r\  rf  ri  rn  rq  rv  r|  r  classmethodr  r   r   r:   r8   r   r      s   b ,0"&	<<< )<  	<
 
< < bb*b7Bb	b b
 # # ( (  B N N 2 2 > >
 > >
     $ $ $ $ + + + + " " & & + + ( ( + + + + , , , , , , 4 4 , , 3 3
 - - , , - - 5 5 $ $ # # : :
 $ $   # # 5 5 6 6 II#.I5@IHSI	I I # # 1 1
*/ = =r:   r   mpsc                  
  ^  \ rS rSr% Sr\rSrSrSr	Sr
\" 5       R                  r\" 5       R                  r\" 5       R                  r\r\" S/5      rS\S	'   / rS
\S'         SU 4S jjrSS jrSS jr S         S S jjrS!S jrSSS\R:                  " 5       4           S"S jjr          S#S jr          S#S jr S$S jr!S%S jr"SS&S jjr# S'       S(S jjr$          S)S jr%Sr&U =r'$ )*MetalKerneli  z;Implement Metal codegen based on the SIMDKernel abstractionr   auto i       utilszOrderedSet[str]ra  zlist[IterationRangesEntry]multistage_reduction_entryc                \   > [         TU ]  " U40 UD6  [        R                  " 5       U l        g r   )super__init__r   r   acc_var_ids)rH   tilingkwargs	__class__s      r8   r  MetalKernel.__init__  s&    
 	*6*$??,r:   c                    [         U   $ r   r   )rH   r   s     r8   dtype_to_strMetalKernel.dtype_to_str  s    e$$r:   c                   U R                   R                  U5      nU R                  U5      n[        R                  R                  U5      nU SU R                  U5       S3nU[        R                  [        R                  4;   a  SU S3n[        R                  nU R                  R                  U R                  XTS9$ )z"Codegen a load from an InputBuffer[]r~   rA   r   )rE   inputr   r   graph	get_dtyper   r4   float16bfloat16float32r   r   loads)rH   r{  indexr   r   lines         r8   loadMetalKernel.load  s    iiood#%%e,!!$'a))%013U]]ENN33 )a0DMMExx  T ??r:   Nc                r   U R                   R                  U5      nU R                  U5      nU R                  [        R
                  R                  U5      5      nSU SU S3nUc  U SU R                  U5       SU S3nO\US:X  aH  U R                  R                  S5        S	U S
3n	SU	 SU S3n
U	 SU
 SU R                  U5       SU S3nO[        SU 35      eU R                  (       a%  U R                  R                  [        X5      5        g U R                  R                  [        X5      5        g )Nr   r   rA   r  ] = r   
atomic_addatomiczc10::metal::AtomicType<>zreinterpret_cast<device z
::type *>(z::atomic_add(r@   r   zUnimplemented store mode )rE   outputr   r  r   r  r  r   ra  rb  rZ   inside_reductionr   r   r   stores)rH   r{  r  valuemoder   	dtype_strcast_valr  atomic_typecast_vars              r8   storeMetalKernel.store  s8    iit$%%e,%%agg&7&7&=>	!)BugQ7<U!D--e45T(1ED\!LLX&3I;a@K1+jQOH!]-zD<M<Me<T;UUWX`WaacdD!:4&ABB  LL""<#;<KK!!,t":;r:   c                   U R                   R                  U5      nU R                  U5      nU R                  [        R
                  R                  U5      5      n[        S U R                   5       5      nU SU R                  U5       SU SU S3nSUR                   SU 3nU R                  R                  [        X5      5        g )Nc              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   is_reductionr   ts     r8   r   .MetalKernel.store_reduction.<locals>.<genexpr>8  s     K(81NNQQ(8   #	#r  z] = static_cast<r   r   r   z == 0) )rE   r  r   r  r   r  r  nextrange_treesr   r{  r  r   r   )rH   r{  r  r  r   r  reduction_dimr  s           r8   store_reductionMetalKernel.store_reduction3  s    iit$%%e,%%agg&7&7&=>	K(8(8KKa))%011A)BugUWXm(()7l467r:   Tc                   [        U[        R                  5      (       a  U R                  U5      nS[	        U R
                  5       3n[        R                  R                  XeU5      nU(       a  SOSnX SU 3-  nU(       a  USU R                  U5       S3-  nUb  U(       a   S5       eUSU 3-  nU R                  R                  XR                  -   5        U$ )	Ntmp_acc_zthreadgroup  r   r  r  z+Thread group var can not have default value = )r3   r4   r   r  r  r  r   r   create_cse_varsexprindexing_coder   suffix)	rH   r   
elem_countdefault_valueis_threadgroupr   var_namer   var_defs	            r8   _new_idxvarMetalKernel._new_idxvar>  s     eU[[))%%e,Ed4#3#3456hh%%h>$2.WAhZ((4::j12!44G$%T'TT%]O,,G$$W{{%:;
r:   c                    X#U4nXPR                   R                  ;   a  U R                   R                  U   $ U R                  XX45      nX`R                   R                  U'   U$ )z)Caching wrapper around _reduction_nocache)r   reduction_cache_reduction_nocache)rH   r   r   reduction_typer  r   results          r8   	reductionMetalKernel.reductionT  s\     6	00088++I66((>Q.4  +r:   c                   U R                   (       d   eU R                  (       a   eSAS jnSnSnU R                   H  nUR                  (       d  M  U(       a  US-  nXhR                   SU 3-  n[        UR                  [        R                  5      (       a  XxR                  -  nMn  U[        R                  " UR                   S3SSS9-  nM     [        R                  " XpR                  5      nU R                  U5      n	[        U[        R                  5      (       a  [        XpR                  5      OU R                  n
US	:X  a  U R!                  U5      nU R"                  R%                  U S
35        U R"                  R%                  S5        U R&                  R)                  SU SU S35        U R*                  R%                  S5        U$ U R,                  R/                  S5        US;   a  [0        U   nU R!                  X5      nU R2                  (       d  UnOAUS:X  a  SOSu  nnU R!                  XSS9nU R&                  R)                  U SU SU S35        U R4                  R7                  U R*                  SU SU SU SU SU	 S3[0        U   S9$ US;   a  U R!                  X*5      n[8        U   nSU S U S3nU R2                  (       d  UnO[UR;                  S!5      (       a  S"OS!nS#U S$U S%3nU R!                  UUSS9nU R&                  R)                  U S&U SU SU S'35        U R4                  R7                  U R*                  SU SU SU SU SU	 S3[0        U   S9$ US(;   Gab  U R!                  X*5      nU R!                  X5      n[8        U   nSU S U S3nU R2                  (       d  UnS[8        U    S U S3nOUR;                  S!5      (       a  S"OS!nS#U S$U S%3nU R!                  UUSS9nU R!                  US)SS9n[=        S* U R>                  RA                  5        5       5      nUS+:X  a  S,OS-nURB                  (       a  S.U S/3OSnU R&                  R)                  SU SU SU U SU S0U S1U S0UR                   S235        U R4                  R7                  U R*                  SU SU SU SU SU SU SU	 S3US9$ US3:X  Ga)  U R2                  (       dz  U R!                  X'5      nU R&                  R)                  U S4U S5U S35        U R4                  R7                  U R&                  SU SU SU	 S3[D        RF                  S9nU" U5      $ U R!                  S6U5      nU S4U S73nU R"                  R)                  U S835        U R&                  R%                  U S9U S:U S;35        U R4                  R7                  U R*                  S<U SU S3[D        RF                  S9nU" U5      $ US=:X  Ga@  [        U[H        5      (       d   S>5       eU R!                  S6U5      nU S4U S73nS?US)    SUS    SUS@    S3nU R"                  R)                  U S835        U R2                  (       aC  U R"                  R)                  U S835        U R&                  R%                  U S9U SU S'35        O!U R&                  R%                  U S0U S35        U R4                  R7                  U R2                  (       a  U R*                  OU R&                  SU SU SU	 S3[D        RF                  S9nU" U5      $ [K        U5      e)Bz]Codegen a reduction operation.
Only sum and prod operations are somewhat reasonable optimizedc           
         [         R                  " S Vs/ s H)  n[        U  SU 3U R                  U R                  5      PM+     sn5      $ s  snf )Nxyzrs   )r   _unwrapr   r   r   )res3r  s     r8   _unwrap_helper6MetalKernel._reduction_nocache.<locals>._unwrap_helperp  sE    %%NSTevQqc]DKKDeT Ts   0Ar  r    + ru   numelTintegerpositiveanyz	 = false;z7threadgroup_barrier(metal::mem_flags::mem_threadgroup);z
                if (z) {
                    z' = true;
                }
            reduction_utils)prodsumr  )r   rV   )r   *F)r  r  r   z= r   zc10::metal::threadgroup_rO   r@   rA   r   )maxminr   r   r  lowestz::metal::numeric_limits<z>::z()z = ::c10::metal::r   )argminargmaxr   c              3  J   #    U  H  oR                   (       d  M  Uv   M     g 7fr   r  r  s     r8   r   1MetalKernel._reduction_nocache.<locals>.<genexpr>  s      =!AA=r  r  r  <z || ::metal::isnan(z) r  z;
                    z$;
                }
                welford_reducer  r  float3r  z = 0.0;z! = ::c10::metal::welford_combine(z	, float3(z, 0.0, 1.0));z(c10::metal::threadgroup_welford_combine(welford_combinez&Input to welford combine must be tuplezfloat3(r   )r  r   r   ztuple[CSEVariable, ...])&r  
_load_maskr  r  r{  r3   r  sympyIntegerSymbolprefixMinmax_threadgroup_sizer  r   simd_group_sizer  r  r   r   r   r  ra  rb  r   r  r   r   r   endswithr  range_tree_nodesvaluesrW  r4   r  tupleNotImplementedError)rH   r   r   r   r  r	  reduction_idxacc_buf_sizerdacc_buf_size_strshmem_buf_sizeacc	acc_dtypeacc_bufr7   default_valreduction_opsrc_metal_type
cast_valuelim_fn	limit_valdata_acc_bufidx_acc_bufidx_validx_varcmp_op
nan_suffixwf_resacc_thread_var	inp_values                                 r8   r  MetalKernel._reduction_nocached  sT    $$$$??""	 ""B??&yL>::M"((EMM22(yyk'!  # yy/H/HI::l3 ,66 L"6"67%% 	 U"""5)C((C5	):;((I LLG E  KK!!I J*+_,29=I&&yAG22 !/% 7HX *\ && '  ##se1\N"UG1$EF88$$*>*:!G9Bse2m_\^_o^ppqr07 %  
 ^+&&yAG+I6N''7r%BJ22 %3%<%<U%C%C6~6Fc&QST	&&Yu '  ##e,^,<AcU"ZLPRS 88$$*>*:!G9Bse2m_\^_o^ppqr07 %  
 11++IFL**5AK+I6N''7r%BJ22 ()>(?r-PQR%3%<%<U%C%C6~6Fc&QST	&&Yu '  **5RW*X #44;;=  !/( : !22 *%3 
 ## )G1VHAcU:, 7EUG $IS /%  88$$*>*:!L>K=XZ%r'"]O26F5GqJ	 %   --22**9C##wiqtE7!$LM**LL.~.>ayK[J\\]^-- + 
 &f--&&x>G 'y-:N%%(8&@ALL""!""CNCSS\]b\ccpq XX&&:7)2l^STUmm ' F
 "&))..eU++U-UU+&&x>G 'y-:N!%(2eAhZr%(1EI%%(8&@A..""))^,<G*DE&&%&&GGWWYZcYddfg &&.)9YKq'IJXX&&#>>DLL*>*:!G9BGWFXXYZmm ' F
 "&))!.11r:   c                	  ^ U R                  TR                  5      nU R                  U5      nTR                  (       aW  [	        TR
                  R                  [        R                  5      (       a]  TR
                  R                  U R                  ::  a9  U R                  R                  U R                   STR                   SU S35        g [	        TR
                  R                  [        R                  5      (       a  TR
                  R                  O,[        R                  " TR
                  R                   S3SSS9n[!        U4S jU R"                   5       5      nTR
                  R                   S3nU(       Gdf  U R"                  R%                  T5        U['        U R                  S	-
  5      -   ['        U R                  5      -  nU R                  U5      nTR
                  R                  n	U R(                  R                  S
TR
                  R                   STR
                  R                   SU STR
                  R                   S3	5        U R(                  R+                  5          [	        U[        R                  5      (       aS  U R(                  R                  U R                   SU SU R                   STR
                  R                   SU	 S3
5        OHU R(                  R                  U R                   SU SU SU	 STR
                  R                   S3
5        [	        U[        R                  5      (       d  XpR                  -  U:w  a"  U R(                  R                  SU SU S35        UR-                  TR
                  R                  U5      n
U R(                  R                  U R                   STR                   SU
 S35        S S S 5        g U R(                  R+                  5          UR-                  TR
                  R                  U5      n
U R(                  R                  U R                   STR                   SU
 S35        S S S 5        g ! , (       d  f       g = f! , (       d  f       g = f)Nr   r  r   r  Tr  c              3  R   >#    U  H  oR                   TR                   L v   M     g 7fr   )root)r   eentrys     r8   r   =MetalKernel.codegen_iteration_ranges_entry.<locals>.<genexpr>-  s       %
*IQFFejj *Is   $'_linear_idxr   z	for(auto z
_cnt = 0; z_cnt < z; ++z_cnt) {ru   z_cnt + r  z_cnt;r    >= z) break;)rename_indexingrI   r  r  r3   rG  r  r!  r"  r&  r  r   index_dtyper{  r#  r$  r  r  appendr+   r   r   replace)rH   rI  r   	index_stracc_sizeroot_already_processedlinear_idx_name	loop_sizeloop_size_str	root_namesub_index_strs    `         r8   codegen_iteration_ranges_entry*MetalKernel.codegen_iteration_ranges_entry  s   ))%**5
JJz*	!!uzz''77

  D$=$==((##$Aejj\YKqA  %****EMM:: JJ!2!2 3594RVW 	 "% %
*.*I*I%
 "
 #ZZ../{;%++2259 "E$*C*Ca*G$HHU))N I !JJy1M

IIIEJJ--.j9J9J8K7S`Raaefkfpfpfwfwex  yA  B !!#h55II''++,Ao->c445S9J9J8K7S\R]]^`
 II''++,Ao->c-PST]S^^abgblblbsbsattyz x66 #<#<<HII''$.?tH:X(VW !* 1 1%**//? S		##''(%**SqI) $#2 !!# ) 1 1%**//? S		##''(%**SqI $#3 $#2 $#s   6E,Q-AQ>-
Q;>
Rc                6   U R                   (       Ga8  U R                  R                  5          U R                  R                  U R                  5        U R                  R                  U R
                  5        SSS5        U R                  R                  S[        U R                   5      -  5        U R                  R                  [        S U R                  R                  R                  5        5       5      5        U R                   (       a;  U R                   R                  5       R                  5         U R                   (       a  M;  OJU R                  R                  U R                  5        U R                  R                  U R
                  5        U R                  R                  U R                  5        U R                  R!                  5         U R
                  R!                  5         U R                  R!                  5         g! , (       d  f       GN= f)z
Concat output code from index_code, loads, compute, stores,
suffix into self.body.

For pointwise kernels, this is called just once at the end.

For reduction kernels, this generates a loop over the reduction
axis.
N}c              3  j   #    U  H)  n[        U[        5      (       a  UOU4  H  nUv   M	     M+     g 7fr   )r3   r+  )r   itemvs      r8   r   +MetalKernel.codegen_body.<locals>.<genexpr>w  s7       A&0u&=&=dD7J J  As   13)r  r   r   r   r  r   r   rY   r   
invalidater
   r  r*  popcache_clearr  clear)rH   s    r8   codegen_bodyMetalKernel.codegen_bodyc  sc    ***!!#		  ,		  . $ IIc$*I*I&J JK
 HH  $ 8 8 ? ? A  11//335AAC 111 IITZZ(IIT\\*		%

1 $#s   AH		
Hc                   U R                  5         [        5       n[        R                  R                  (       a  UR                  S5        OUR                  S5        U R                  5       nUR                  5          [        R                  R                  (       d)  U R                   H  nUR                  SU S35        M     OuU R                   Vs/ s H	  nSU S3PM     nn[        U[        [        5      R                  R                  R                  S-  /[        5       5      nUR                  U5        U R                  (       a|  [        R                   " S U R"                   5       5      n[%        U[&        R(                  5      (       a  [+        XpR,                  5      OU R,                  nUR                  SU S35        UR                  S	5        UR                  5          U R.                  R0                  R3                  5        H\  u  pXR4                  ;   a  M  U R7                  [        R                  R9                  U	5      5      nUR                  S
U SU
 S35        M^     U R.                  R:                  R3                  5        H  u  p[        R                  R9                  U	5      nU[<        R>                  :X  aD  [        R                  RA                  U	5      nUb  URC                  5       / :w  a  [E        S5      eSnOU R7                  U5      nUR                  SU SU
 S35        M     U R.                  RF                  RI                  5        H  n
UR                  SU
 S35        M     U HM  n[%        URJ                  [&        R(                  5      (       a  M.  UR                  SURL                   S35        MO     SU R                  ;   a  UR                  S5        [O        U5      S:  d   S5       e[O        U5      S:  a  S[O        U5       3OSn[O        U5      S:X  a  US   RP                  OSnU R                  (       a  SOSnUR                  U SU SU 35        U R                  (       a  UR                  U S35        SSS5        UR                  S5        UR                  5          [O        U5      S:  aC  [S        U5       H4  u  nnUR                  S URP                   S![U        S"U-   5       S#35        M6     URW                  U RX                  5        URW                  U RZ                  5        SSS5        UR                  S$5        SSS5        [        R                  R                  (       a!  UR                  S%5        UR]                  5       $ UR                  S&5        UR]                  5       $ s  snf ! , (       d  f       GN@= f! , (       d  f       N= f! , (       d  f       N= f)'z3Called at the end to generate a final kernel stringz(R"MTL(zcompile_mps_shader('''z#include <c10/metal/z.h>includec              3  ^   #    U  H#  oR                   (       d  M  UR                  v   M%     g 7fr   )r  r  r  s     r8   r   -MetalKernel.codegen_kernel.<locals>.<genexpr>  s      1%5GAGG%5s   --z$[[max_total_threads_per_threadgroup(z)]]zkernel void generated_kernel(zdevice z* ,Nzfloat64 is not supported by MPSr+   z	constant zconstant long& znumel,errorz,device c10::metal::ErrorMessages* error_buf,   z%Up to 3 index variables are supportedr   uintr   
thread_posr  r   z [[thread_position_in_grid]]z- group_pos [[thread_position_in_threadgroup]]r   r  z = thread_pos.x   r   r\  z)MTL");z'''))/re  r   r   r  cpp_wrapperr   active_range_treesr   ra  r	   r   __file__parentr
   r  mathr  r  r3   r!  r"  r  r&  rE   output_buffersitemsremoved_buffersr  r  input_buffersr4   float64try_get_bufferget_sizerZ   sizevarsr*  r  r$  rY   r{  	enumeratechrr   r  r   r   )rH   r{  codeidx_varsheaderra  header_contentstotal_reduction_sizethreadgroup_sizeouterinnerr  r   	outer_bufr>  thread_pos_dtypethread_pos_var_namethread_pos_suffixidxr   s                       r8   codegen_kernelMetalKernel.codegen_kernel  s   77NN9%NN34**,[[]77&&"llFNN%9&#EF + FJ\\EQ6*6(#6\   #1(^**11889DEL#
 /$$'+yy 1%)%5%51 ($ ""6FF ,.G.GH22 !
 :;K:LCP NN:;$(II$<$<$B$B$DLE 4 44  $ 1 1!''2C2CE2J KINNWYKr%#BC	 %E
 %)II$;$;$A$A$CLEGG--e4E-$%GG$:$:5$A	$,	0B0B0D0J"./P"QQ$+	$($5$5e$<	NNYykE7!#DE %D "YY//668ENN_UG1#=> 9  (G!'--??8H'OP	  ( dll*NN#QR8}q(Q*QQ(.1(ma.?d3x=/*V ! ),H(:HQK$$ $ ,0+@+@Cb!'(*=)>>Z[lZmn ((NN+,,YZW \ NN5!x=1$$-h$7S#CHH:^Cc	N;K1M %8 D../DII&  NN3k n 77NN9% }} NN6"}}o2 ^ [ ]sR   =AW3W$DW3)J.W)W3 B	W"	W3W3
W	W3"
W0	,W33
Xc           
     	   [         R                  R                  nU R                  R                   H  nUR                  U5        M     U R                  R                  5       u  pgph[        Xx5       V	V
s0 s H  u  p[        U	5      U
_M     nn	n
/ U R                  R                  R                  5       QU R                  R                  R                  5       QnU Vs/ s H  oU R                  ;  d  M  UPM     nnXR                  R                   Vs/ s H  n[        U5      PM     sn-  nU Vs/ s H  oU   PM	     nnU R                   H  n[        UR                  [         R"                  [$        45      (       a  M4  [        UR                  [         R&                  5      (       a  UR                  nO3[         R                  R                  R)                  X5      R*                  nUR,                  (       a  U R.                  (       d  M  UR1                  [        U5      5        UR1                  [$        5        M     [         R                  R2                  (       a  U R4                  OU R6                  nSS jn[9        U R;                  5       5      S:  a  U R;                  5        Vs/ s HQ  nU" UR,                  (       a+  [         R<                  " UR                  U R>                  5      OUR                  5      PMS     nnUR1                  U" US5      5        UR1                  [@        5        O*[         R                  R2                  (       a  [C        S5      eU R.                  (       a  U R;                  5        Vs/ s HG  nUR,                  (       a1  U" [         R<                  " UR                  U R>                  5      5      OSPMI     nnUR1                  U" US5      5        UR1                  [@        5        O6[         R                  R2                  (       a  US/-  nUR1                  S5        SU RD                  ;   aY  [         R                  R2                  (       d:  UR1                  S	[9        U Vs/ s H  oc  M  S
U;  d  M  UPM     sn5       35        URG                  UU[H        RJ                  " S5      SUS9  gs  sn
n	f s  snf s  snf s  snf s  snf s  snf s  snf )z 
Codegens a call to this kernel
threadsc                    [         R                  R                  (       a+  U  Vs/ s H	  nSU S3PM     n nSSR                  U 5       S3$ U SSR                  U 5       S3$ s  snf )Nzstatic_cast<uint64_t>(rA   {r@   r\  z=[r  )r   r  rq  join)r  kwargr  s      r8   format_threads/MetalKernel.call_kernel.<locals>.format_threads  si    ww""BIJ'Q3A3a8'JDIIg./r22499W#5"6a88 Ks   A"r   zWe should always have threads?1
group_sizeNrl  zerror_buf_idx==r  F)devicetriton	arg_types)r  z	list[str]r  r6   r   r6   )&r   r  wrapper_coderE   r}  ensure_size_computedpython_argdefszipr6   rv  keysry  rx  r  r3   r  r!  r"  r(   r#  generate_numel_exprr  r  r  rO  rq  cexprpexprrY   rr  r%  r&  listrZ   ra  generate_kernel_callr4   r  )rH   r{  nodedeallocate_wswrapperr_  _	call_argsr  call_argarg_typearg_name_to_typerE   r   treerI   expr_printerr  r  s                      r8   call_kernelMetalKernel.call_kernel  s    ''&&##A((+ $ &*YY%=%=%?"a>A)>W
>W(:CM8#>W 	 
 S))..0R4993J3J3O3O3QR#Gt$2F2F'FtG!3!34!3AQ!3446:;dsc*d	; $$D$**u}}c&:;;DJJ55zzww++??KQQ$$(=(=(=CI&  % % &'WW%8%8tzzdjj	9 t&&()A- 002 3A ~~ IIaggt'@'@A
 3   KKw	:;T"ww"""#CDD  
 002	 3A >> UYYqww0I0IJK 3	   KKw=>T"ww""   & dll"177+>+>KK T%`TccQT\_Q_cT%`!a bc 	$$<<& 	% 	
S

 H4;8 & &as>   4SS4SS2S-ASAS!:S&S&S&c                   U(       d  U(       d  g U R                  U5      nU R                  U5      nU(       a  U(       a  SU SU SU S3nOU(       a  U S3nOU SU 3n[        R                  R                  (       a*  U R                  R                  U R                  SU S3SS	9  g U R                  R                  S
5        U R                  R                  SU S3SU SU S3SS/5        g )NrO   z < 0 || rL  rA   z < 0r   z) returnF)
assignmentrl  r   z,    TORCH_REPORT_ERROR(error_buf, "Index ", z, " out of range [0, ", z, ")");z    return;r\  )
r   r   r  rq  r   r   r   ra  rb  r   )rH   rI   sizelowerupperexpr_strsize_str	conditions           r8   check_boundsMetalKernel.check_boundsN  s     $$T*$$T* UH:XhZtH:QGI#*D)I#*D
3I 77HHYKx8U  
 LLW%LL##9+T*B8*Ldemdnnuv!	r:   )r  )r  zdict[str, sympy.Expr]r  r   r   r  )r   r  r   r6   )r{  r6   r  r   r   r   r   )
r{  r6   r  r   r  r   r  r!   r   r  )r{  r6   r  r   r  r   r   r  )r   zUnion[str | torch.dtype]r  zOptional[int]r  zOptional[Any]r  r%   r   zValueRanges[Any]r   r   )
r   r  r   r  r   r    r  +Union[CSEVariable, tuple[CSEVariable, ...]]r   r  )rI  r   r   r  r  )r{  zOptional[str]r   r6   r  )r{  r6   r  r   r  r%   r   r  )
rI   r   r  r   r  r%   r  r%   r   r  )(r   r   r   r   r   r   	overridesr  newvar_prefixr&  r'  r   rF   r  r   r  r<   r  kexprr
   ra  __annotations__r  r  r  r  r  r  r   unknownr  r  r  rY  re  r  r  r  r   __classcell__r  s   @r8   r  r    s   EIFMOO##EL  E&&EE)7)4G_4=? :?-%- - 
	-%@ SW<< *<3><FO<	<*	8 %)'+##.#6#6#8' " %	
  ! 
,  &	
 ; 
5 s2s2 s2 &	s2
 ;s2 
5s2jHT#JgT BF[
[
"[
:>[
	[
z""&0"9="FJ"	" "r:   r  c                  J   ^  \ rS rSr\rSU 4S jjr        SS jrSrU =r	$ )MetalSchedulingis  c                   > [         TU ]  U5        [        R                  R                  nUb<  [        R                  R
                  (       d  UR                  R                  S5        g g g )NzDfrom torch._inductor.runtime.runtime_utils import compile_mps_shader)r  r  r   r  r  rq  r  r   )rH   	schedulerr  r  s      r8   r  MetalScheduling.__init__v  sQ    #''&&77&&%%Z ' r:   c                \   [         R                  R                  nXR                  ;   a  UR                  U   nU$ SUR	                  5        3nU nXTR                  U'   [         R                  R
                  (       a	  SU S3U-   n[        X$5      u  pxU SU 3n	UR                  XaU	SS9  U$ )Nmps_lib_zconst char* z
_source = 
F)gpu)r   r  r  src_to_kernelnext_kernel_suffixrq  r   define_kernel)
rH   src_codenode_scheduler   r  kernel_namemps_lib_nameoriginsdetailed_originsmetadata_comments
             r8   r  MetalScheduling.define_kernel  s     ''&&,,,!//9K"  &g&@&@&B%CDL)NK.9!!(+ww""),zBXM(;M(S%G")"-=,>?!!,:JPU!Vr:   r   )r  zOptional[Scheduler]r   r  )r  r6   r  zlist[SchedulerNode]r   r  r   r6   )
r   r   r   r   r  kernel_typer  r  r   r  r  s   @r8   r  r  s  s7    K,?IT	 r:   r  )r7   z)Union[float, int, bool, str, CSEVariable]r   r6   )I
__future__r   r  r   loggingru  pathlibr   typingr   r   r   r!  sympy.printing.precedencer   r4   torch.utils._cpp_embed_headersr	   torch.utils._ordered_setr
   torch.utils._sympy.printersr   r   ExprPrinter_torch.utils._sympy.value_rangesr   r  r   r   r   virtualizedr   r   r   commonr   r   r   r   r   r   simdr   r   r   r   ops_handlerr    r!   r  r"   r#   r$   	getLoggerr   r   r%   int8int16int32int64uint8r+   r,   r  r   r9   r<   r   _initialize_pointwise_overridesr  r  r  r   r:   r8   <module>r     s(   #      / /  0  9 / O 7 G G , ,  C B 64! 
JJ	JJ	KK	KK	KK	KK	KK	JJ	NNH
r"| r"jt[ tn	  . .u 5  & & (z	* z	z"n "r:   