
    N jT             -          S SK r S SKJrJr  S SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKrS SKJr  S SKJrJrJr  S SKJrJrJrJr  S S	KJr  S S
KJrJr  S SKJrJrJrJ r J!r!J"r"J#r#J$r$J%r%J&r&J'r'J(r(J)r)  S SK*J+r+J,r,J-r-J.r.J/r/  S SK0J1r1J2r2  S SK3J4r5  S SK6J7r7J8r8  S SK9J:r;  \	" S5      r<\" S5      r=\R|                  R~                  r?\R                  R                  SSS5      rB\C" S5      u  rDrErFS rGS rHS\\\=\<4   /\\=\<4   4   4S jrIS\!4S jrJS rKS rL\I" \?R                  \?R                  /5      \/" 5       SSS\R                  SS4S  j5       5       rP\I" \?R                  R                  \?R                  R                  /5      \/" 5       S! 5       5       rT\I" \?R                  R                  \?R                  R                  /5      \/" 5       S"S#.S$ j5       5       rU\I" \?R                  5      \/" 5       S% 5       5       rV\I" \?R                  R                  \?R                  R                  \?R                  R                  \?R                  R                  /5      \/" S&S'5      S( 5       5       rY\I" \?R                  R                  \?R                  R                  /5      \/" 5       S) 5       5       rZS* r[GS=S+\S,\\\]   S-\^4S. jjr_\I" \?R                  R                  \?R                  R                  /5      \/" 5       S/ 5       5       raSrbS,\\\]   4S0 jrc\I" \?R                  R                  \?R                  R                  /5      \/" 5       S1 5       5       re\I" \?R                  R                  5      SS2.S3 j5       rh\I" \?R                  R                  5      \R                  SSSS4.S5 j5       rj\I" \?R                  R                  \?R                  R                  /5      \/" 5       \R                  SSSS4.S6 j5       5       rl\I" \?R                  R                  \?R                  R                  /5      \/" 5       \R                  SSSS4.S7 j5       5       ro\I" \?R                  R                  \?R                  R                  /5      \/" 5       SSSSS4.S8 j5       5       rq\I" \?R                  R                  \?R                  R                  /5      \/" 5       S+\S,\\\]   S9\]S:\]4S; j5       5       rs\I" \?R                  R                  5      GS=S< j5       ruS= rv\I" \?R                  R                  5      S> 5       rx\I" \?R                  5         GS>S?\S@\SA\SB\S-  SC\zS-  SD\R                  S-  4SE jj5       r|\I" \?R                  5       GS?SF\SG\SH\SD\R                  S-  4SI jj5       r~\I" \?R                  5      SJSJSSK.S?\SF\SG\SH\SD\R                  S-  4
SL jj5       r\I" \?GR                  5             GS@SM\R$                  SN\R$                  SB\S-  SO\S-  SD\R                  S-  SP\^SQ\]SR\]SS\]4ST jj5       r\I" \?GR                  R                  5      SUSV.S+\S,\]SW\SX\R$                  SY\zSZ\^S\4S[ jj5       r\I" \?GR
                  R                  5      SUSV.S+\S,\]SW\SX\R$                  SY\zSZ\^S\4S\ jj5       r\/" 5       \I" \?GR                  R                  5      S] 5       5       r\I" \?GR                  R                  5      SSSS SSS^.S_\SY\zS`\S-  S'\S-  Sa\S-  Sb\]Sc\^S\4Sd jj5       r\I" \?GR                  R                  \?GR                  GR                  /5      \/" 5       Se 5       5       r\I" \?GR                  GR                  5      GS=Sf j5       r\I" \?GR                   R                  \?GR                   GR                  /5      \/" 5       Sg 5       5       r\I" \?GR                   GR                  5      GS=Sh j5       r\I" \?GR&                  R                  5      Si 5       r\I" \?GR&                  R                  5      Sj 5       r\I" \?GR,                  R                  5      Sk 5       r\I" \?GR,                  GR0                  5      Sl 5       r\I" \?GR4                  R                  5      Sm 5       r\I" \?GR8                  R                  5      SSSSSSn.So j5       r\I" \?GR<                  R                  5      GSASp j5       r\I" \?GR>                  R                  5      GS>Sq j5       r\I" \?GRB                  R                  5      GSASr j5       r\I" \?GRD                  R                  5      Ss 5       r\I" \?GRH                  GR0                  5      St 5       rS+\Su\z4Sv jrS+\Sw\Sx\z4Sy jr GSBSz\Su\zS{\^4S| jjrGSCSw\Su\zS}\z4S~ jjrSw\S\S\^Su\z4S jr GSDS\zS\S?\S\z4S jjrS\z4S jr\I" \?GRZ                  R                  \?GRZ                  GR\                  /5      \/" SS5      GSESw\S\zS\^4S jj5       5       r\I" \?GR`                  R                  \?GRb                  R                  /5      \/" 5       S?\S\4S j5       5       r\I" \?GRf                  /5      \/" SS5      S?\4S j5       5       rS\S\4S jr\I" \?GRl                  5      \/" 5       S+\Sw\S\^S\4S j5       5       r\I" \?GRn                  5      \/" 5       GS=S+\Sw\S\^S\4S jj5       5       r\I" \?GRp                  5      \/" 5       GS=S+\S\^S\4S jj5       5       r\I" \?GRr                  5      \/" 5       GS=S+\S\^S\4S jj5       5       r\I" \?GRt                  R                  5      GSFSw\S\^S\^4S jj5       r\I" \?GRv                  R                  \?GRv                  R                  /5      \/" 5       S?\S\S\4S j5       5       r\I" \?GRx                  R                  5      GS=Sw\S\^4S jj5       r\I" \?GR|                  R                  \?GR|                  R                  /5      \/" SSS5      SSS.S+\S\^S\^S\\\\4   4S jj5       5       r\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       SS.S\S\S\S\^S\4
S jj5       5       r\I" \?GR                  R                  \?GR                  R                  /5      \/" SSS5      SUS.Sw\S\^S\\\\4   4S jj5       5       r\I" \?GR                  R                  \?GR                  R                  /5      \/" SSS5      SUSS.Sw\S\^S\^S\\\\4   4S jj5       5       r\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       SUSS.S\S\S\S\^S\^S\4S jj5       5       r\I" \?GR                  5      \/" SSS5        GSGS\S\S\^S\^S\\\\4   4
S jj5       5       rS\zS\\^\^4   4S jr\I" \?GR                  R                  \?GR                  R                  /5      \/" SS5      GSHSw\S\zS\\\4   4S jj5       5       r\I" \?GR                  R                  \?GR                  GR                  /5      \/" SSSS5      Sw\S\\\\\4   4S j5       5       r\I" \?GR                  R                  5         GSISw\S\^S\^S\zS-  4S jj5       rS\S\S\\\\]   \\\]   4   4S jrS\S\Sx\zS-  S\\\4   4S jrS?\S\S\^4S jr\I" \?GR                  5      SUSSSSSS.Sw\S\S\^S\^S\S-  S\S-  S\S-  S\S-  S\\\\\4   4S jj5       r\I" \?GR                  R                  \?GR                  R                  /5      SUSSS.Sw\S\S\^S\^S\^S\S-  S\4S jj5       r\I" \?GR                  5      \/" SSSUS9   GSJS+\Sw\S\^S\^S\^S\\\4   4S jj5       5       r\I" \?GR                  R                  5      S 5       r\I" \?GR                  5      \/" 5         GSKS?\S\S\S\^S\^S\4S jj5       5       rS rS r\I" \?GR                  5      \/" 5       S 5       5       r\I" \?GR                  5      \/" 5       S 5       5       rS r\I" \?GR                  5      \/" S5      S 5       5       r\I" \?GR                  5      \/" S5      S 5       5       rS r\I" \?GR                  5      \/" 5       S 5       5       r\I" \?GR                  5      \/" 5       S 5       5       r\I" \?GR                  R                  5      S 5       r\I" \?GR                  R                  \?GR                  GR                  \?GR                  R                  \?GR                  GR                  /5      \/" S5      S 5       5       rS r\I" \?GR                  5      \/" 5       S 5       5       r\I" \?GR                  5      \/" 5       S 5       5       r\I" \?GR                  R                  \?GR                  GR                  \?GR                  R                  \?GR                  GR                  /5      \/" S5      S 5       5       r\I" \?GR                  5      \/" 5       GSLS+\S\S\4S jj5       5       r\I" \?GR                  5      \/" 5       S\S+\S\S\S\4
S j5       5       r\I" \?GR                   R                  \?GR                   R                  /5      \/" SUS9SJSJS.S j5       5       Gr\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       SS2.S j5       5       Gr\I" \?GR                  GR                  5      GSMS j5       Gr\I" \?GR                  GR                  5      GSMS j5       Gr\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       GS?S j5       5       Gr	\I" \?GR                  R                  5        GSFS j5       Gr\I" \?GR                  5      \/" SUS9GS?SD\R                  S-  4S jj5       5       GrS GrGSNS jGr GS?S\R$                  S@\R$                  S\\\]   \]-  S\\\]   \]-  S\\\]   \]-  S\^S\]S\\\]   \]-  S-  4S jjGrS Gr\I" \?GR$                  R                  5      S\R$                  S@\R$                  SB\R$                  S-  S\R$                  S-  S\R$                  S-  S\^S\S\4S j5       Gr\I" \?GR(                  R                  5      S\R$                  S@\R$                  SB\R$                  S\\\]   S\\\]   S\\\]   S\^S\\\]   S\]4S j5       Gr\GR,                  GR.                  (       Ga
  \R                  R                  GS SS5      Gr\I" \R|                  GR2                  GR4                  R                  5      GS 5       Gr\I" \R|                  GR2                  GR8                  R                  5      GS 5       Gr\GR,                  GR<                  (       aO  \R                  R                  GSSS5      Gr\I" \R|                  GR@                  GRB                  5      GS 5       Gr"\R                  R                  GSSS5      Gr#\I" \R|                  GRH                  GRJ                  R                  5      \I" \R|                  GRH                  GRL                  R                  5      \I" \R|                  GRH                  GRL                  GRN                  5      GS 5       5       5       Gr(\I" \R|                  GRH                  GRJ                  GRR                  5      \I" \R|                  GRH                  GRJ                  GRT                  5      GS 5       5       Gr+\I" \R|                  GRH                  GRX                  R                  5      \I" \R|                  GRH                  GRX                  GRN                  5      GS 5       5       Gr-\I" \R|                  GRH                  GRX                  GRR                  5      \I" \R|                  GRH                  GRX                  GRT                  5      GS	 5       5       Gr.\I" \R|                  GRH                  GR^                  R                  5      \I" \R|                  GRH                  GR`                  R                  5      GS
 5       5       Gr1\R                  R                  GSSS5      Gr2\I" \R|                  GRf                  GRh                  5          GSOGS j5       Gr5\I" \R|                  GRf                  GRl                  5      GS 5       Gr7GS Gr8\I" \?GRr                  R                  5           GSPGS j5       Gr:GS Gr;\I" \?GRx                  R                  5      GS 5       Gr=\I" \?GR|                  5      \/" 5            GSPGS j5       5       Gr?\I" \?GR                  5      \/" S5      GS 5       5       GrA\I" \?GR                  R                  5      GS 5       GrC\I" \?GR                  R                  5      GS 5       GrE\I" \?GR                  R                  5      GS 5       GrG\I" \?GR                  5      \/" S5      GS 5       5       GrIGS\S}\z4GS jGrJ\I" \?GR                  5      \/" SS'5      GS 5       5       GrL\I" \?GR                  5      \/" S5      GS 5       5       GrN\I" \?GR                  5      \/" SS'5      GS 5       5       GrP\I" \?GR                  5      \/" S5      GS 5       5       GrR\I" \?GR                  R$                  5      GS?GS  j5       GrT\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       GS! 5       5       GrV\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       S"GS".GS#\]4GS$ jj5       5       GrW\I" \R|                  R~                  GR                  R                  \R|                  R~                  GR                  R                  /5      \/" 5       GS% 5       5       GrX\I" \?GR                  R$                  \?GR                  R$                  /5      GS& 5       Gr[\I" \?GR                  R                  /5      GS' 5       Gr]\I" \?GR                  R                  \?GR                  R                  /5      \/" SUS9SJSJS.GS( j5       5       Gr_\I" \?GR                  R$                  /5      GS) 5       Gra\I" \?GR                  R                  \?GR                  R                  /5      SSGS*.GS+ j5       Grd\I" \?GR                  R                  /5      SSGS*.GS, j5       Grf\I" \?GR                  /5      \/" 5       GS- 5       5       Grh\I" \?GR                  /5      GS. 5       Grj\I" \?GR                  /5      GS/ 5       Grl\I" \?GR                  /5      GS0 5       Grn\I" \?GR                  /5      GS1 5       Grp\I" \?GR                  /5      GS2 5       GrqGS3\]GS4\]S\]4GS5 jGrrGS6 Grs\I" \?GR                  /5      SB\S-  4GS7 j5       Gru\I" \?GR                  /5      GS8 5       Grw\I" \?GR                  /5      GS9 5       Gry\I" \?GR                  R                  5      GS: 5       Gr{\I" \?GR                  5      \/" 5       GS; 5       5       Gr}\I" \?GR                  R                  5            GSQGS< j5       Gr\I" \?GR                   R                  5      GS= 5       GrGSBGS> jGr\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       GSRSGS?.GS@ jj5       5       Gr\I" \?GR
                  R                  \?GR                  R                  /5      GSA 5       Gr\I" \?GR
                  GR                  \?GR
                  GR                  \?GR                  GR                  \?GR                  GR                  \?GR                  R                  \?GR                  GR                  /5      \/" S&S'5      GSSGSB j5       5       Gr\I" \?GR                  R                  5      GSC 5       Gr\I" \?GR                  R                  5      GSD 5       Gr\I" \?GR                   R                  5      GSE 5       Gr\I" \?GR$                  GR&                  \?GR(                  GR&                  \?GR$                  R$                  \?GR(                  R$                  \?GR*                  R                  \?GR,                  R                  \?GR.                  R                  /5      GSF 5       Gr\I" \?GR2                  GR&                  \?GR4                  GR&                  \?GR2                  R$                  \?GR4                  R$                  /5      GSGSG j5       Gr\I" \?GR8                  GR&                  \?GR:                  GR&                  /5      GSGSH j5       Gr\I" \?GR>                  R                  \?GR>                  GR@                  /5      GSI 5       GrGSJ Gr\I" \?GRF                  R$                  \?GRF                  GR&                  /5      GSK 5       Gr\I" \?GRJ                  R$                  \?GRJ                  GR&                  /5      GSL 5       Gr\I" \?GRN                  R                  5      GSM 5       Gr\I" \?GRR                  R$                  \?GRR                  GR&                  /5      GSN 5       Gr\I" \?GRV                  R$                  \?GRV                  GR&                  /5      GSO 5       Gr\I" \?GRZ                  R                  5      GSP 5       Gr\I" \?GR^                  R$                  5      \/" 5       GSS\4GSQ jj5       5       Gr\I" \?GRb                  /5      \/" 5        GSTGSR j5       5       Gr\I" \?GRf                  /5       GSTGSS j5       Gr\I" \?GRj                  /5       GSTGST j5       Gr\I" \?GRn                  R                  \?GRp                  R                  /5      GS=GSU j5       Gr\I" \?GRt                  GR&                  5      GSV 5       Gr\I" \?GRx                  R                  5      GSW 5       Gr\I" \?GR|                  5      GSX 5       Gr\I" \?GR                  5      \/" 5       GSY 5       5       Gr\I" \?GR                  5      GSZ 5       Gr\I" \?GR                  R                  5      GS=GS[ j5       GrGSAGS\ jGr\I" \?GR                  R                  5      GS] 5       Gr\I" \?GR                  R                  5      GS^ 5       GrGS_ GrGS` GrGSa GrGSb Gr GS=S?\GSc\]GSd\]GSe\]GSf\]GSg\]GSh\]GSi\]GSj\]GSk\]GSl\]GSm\]GSn\]GSo\]GSp\]GSq\]GSr\]GSs\]GSt\]GSu\]S\zGSv\^4,GSw jjGrGSx GrS?\GS\GSc\]GSd\]GSe\]GSf\]GSg\]GSh\]GSi\]GSj\]GSk\]GSl\]GSp\]GSq\]GSr\]GSs\]GSt\]GSu\]S\z4&GSy jGrGSz Gr\I" \?GR                  R                  5      GS{ 5       Gr\I" \?GR                  R                  5          GSOGS| j5       Gr\I" \?GR                  R                  5      GS} 5       Gr\I" \?GR                  5      \/" SS'5          GSOGS~ j5       5       Gr\I" \?GR                  5      \/" S5      GS 5       5       GrS?\GS\4GS jGr " GS GS\5      GrS?\GS\GS\]4GS jGr\I" \?GR                  R                  5      GS 5       Gr\I" \?GR                  5      \/" 5       GS 5       5       Gr\I" \?GR                  5      \/" SGS5      GS 5       5       Gr\I" \?GR                  R                  /5      GS 5       Gr\I" \?GR                  R                  5           GSUGS j5       Gr\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       SSSSSGS.GS j5       5       Gr\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       SSSSSGS.GS j5       5       Gr\I" \?GR                  R                  5      GS 5       Gr\I" \?GR                  R                  5      GSVGS j5       GrGSBS,\]GS\]GS\^4GS jjGrGS GrGS Gr\I" \?GR                  R                  5      GS=GS j5       GrGS=GS jGrGS?GS jGrGS GrGS?GS jGrGSWGS jGr\I" \?GR                  R                  5      GS 5       Gr\I" \?GR                  5      GS 5       Gr\I" \?GR                  GR                  \?GR                  GR                  \?GR                  GR                  \?GR                  GR                  /5      \/" 5       GS?GS j5       5       Gr \I" \?GR                  GR                  \?GR                  GR                  \?GR                  GR                  \?GR                  GR                  /5      GS?GS j5       Gr\I" \?GR                  R                  /5          GSXGS\GS\GS\GS\GS\^GS\^GS\S-  4GS jj5       Gr\I" \?GR                  GRf                  /5          GSXGS\GS\GS\GS\S-  GS\S-  GS\S-  GS\GS\^GS\^GS\S-  4GS jj5       GrGS\GS\\]GS4   4GS jGr\I" \?GR                  /5          GSXGS\GS\GS\GS\S-  GS\^GS\GS\^GS\^GS\S-  4GS jj5       Gr\I" \?GR                  /5           GSYGS\GS\GS\GS\S-  GS\GS\^GS\^GS\S-  4GS jj5       Gr
\I" \?GR                  /5       GS?GS\GS\GS\GS\S\GS\GS\GS\GS\]GS\]GS\GS\^GS\GS\GS\S-  4GS jj5       Gr\I" \?GR                  /5          GSZGS\GS\GS\GS\GS\^GS\S-  GS\S-  4GS jj5       Gr\I" \?GR                  /5        GSAGS\GS\GS\GS\S\GS\GS\GS\^GS\S-  GS\S-  4GS jj5       Gr\I" \?GR"                  /5           GS[GS\GS\GS\GS\S-  GS\GS\^GS\S-  GS\S-  S\\\4   4GS jj5       Gr\I" \?GR&                  /5         GS\GS\GS\GS\GS\S-  GS\^GS\^GS\S-  4GS jj5       Gr\I" \?GR*                  /5        GS]GS\GS\GS\GS\GS\S-  S\GS\GS\GS\GS\GS\\\^   GS\^GS\S-  4GS jj5       Gr\I" \?GR.                  /5       GS?GS\GS\GS\GS\S\GS\GS\GS\GS\GS\GS\GS\]GS\]GS\GS\^GS\S-  4 GS jj5       Gr\I" \?GR2                  R                  /5           GSUGS\GS\GS\GS\S-  GS\S-  GS\]GS\]GS\GS\^GS\^GS\S-  GS\]S-  GS\]S-  GS\S-  GS\S-  4GS jj5       Gr\I" \?GR2                  GRf                  /5           GSUGS\GS\GS\GS\S-  GS\S-  GS\]GS\]GS\GS\^GS\^GS\S-  GS\S-  GS\S-  GS\S-  GS\]S-  GS\]S-  GS\S-  GS\S-  4$GS jj5       Gr\I" \?GR8                  /5         GS>GS\GS\GS\GS\S\GS\GS\GS\GS\]GS\]GS\GS\^GS\GS\GS\S-  GS\]S-  GS\]S-  4"GS jj5       Gr\I" \?GR<                  /5           GS^GS\GS\GS\SB\S-  GS\S-  GS\S-  GS\]S-  GS\]S-  GS\GS\]GS\^GS\S-  GS\S-  GS\S-  GS\]S-  4GS jj5       Gr\I" \?GR@                  /5         GSWGS\GS\GS\GS\SB\S-  GS\S-  GS\S-  GS\GRB                  GS\GRB                  GS\GS\GS\GS\GS\]GS\^GS\S-  GS\]S-  GS\^4$GS jj5       Gr"    GS_S+\R$                  SH\R$                  GS\R$                  GS\R$                  SB\R$                  S-  GS\R$                  S-  SD\R                  S-  GS\^4GS jjGr#\I" \?GRH                  R                  /5          GS_S+\R$                  SH\R$                  GS\R$                  GS\R$                  SB\R$                  S-  GS\R$                  S-  SD\R                  S-  GS\^4GS jj5       Gr%     GS`S+\R$                  SH\R$                  GS\\\R$                     GS\\\7   GS\\\R$                     GS\\\7   SB\R$                  S-  SD\R                  S-  GS\\\8   S-  GS\\\8   S-  GS\^4GS jjGr&\I" \?GRN                  R                  /5          GS_S+\R$                  SH\R$                  GS\\\R$                     GS\\\7   GS\\\8   GS\\\R$                     GS\\\7   GS\\\8   SB\R$                  S-  GS\R                  S-  GS\\\]   S-  GS\^4GS jj5       Gr(\I" \?GRR                  GRT                  \?GRR                  GRV                  /5      \/" 5       GSBGS j5       5       Gr,\I" \?GRZ                  GRT                  5      GSBGS j5       Gr.\I" \?GR^                  R                  \?GR^                  R                  /5      \/" 5       GS=SS2.GS jj5       5       Gr0GS Gr1GS Gr2\I" \?GRf                  R                  \?GRh                  R                  /5      GS?GS j5       Gr3\I" \?GRj                  R                  \?GRl                  R                  /5      GSAGS j5       Gr5\I" \?GRn                  R                  \?GRp                  R                  /5        GSAGS\GS\\]\GRB                  -     GS\\]\GRB                  -     GS\S-  GS\S-  4
GS jj5       Gr7\I" \?GRr                  R                  \?GRt                  R                  /5      GS>GS j5       Gr9\I" \?GRv                  R                  \?GRv                  GRx                  \?GRv                  GR                  \?GRv                  GRz                  /5      GSaGS j5       Gr>GS Gr?\I" \?GR                  R                  5        GSAGS j5       GrA\I" \?GR                  R                  5      GS 5       GrB\I" \?GR                  R                  5      GS 5       GrC\I" \?GR                  R                  5      GS 5       GrDGS GrEGS GrF\I" \?GR                  R                  \?GR                  R                  /5      GSRGS j5       GrI\I" \?GR                  R                  5      GSbGS j5       GrJ\I" \?GR                  R                  5      GScGS j5       GrL\I" \?GR                  5      \/" 5        GSdGS j5       5       GrN\I" \?GR                  R                  \?GR                  GR                  /5      \/" S&S'5      GSSGS  j5       5       GrP\GR                  GrRGS GrS\I" \?GR                  R                  5      GS 5       GrT\I" \?GR                  R                  5      GS 5       GrU\I" \?GR                  R                  5      GS 5       GrW\I" \?GR                  R                  5      GS 5       GrX\I" \?GR                  R$                  \?GR                  GR                  /5      \/" 5       SSGS.GS j5       5       Gr[\I" \?GR                  GR&                  \?GR                  GR                  /5      SSGS.S+\&GS\GS	\^GS
\^4GS jj5       Gr]\I" \?GR                  /5      \/" 5       GSeGS j5       5       Gr_\I" \?GR                  R                  \?GR                  R                  /5        GSAGS j5       Grb\I" \?GR                  R                  /5        GSAGS j5       Grd\I" \?GR                  R                  5      GS 5       Gre\I" \?GR                  R                  \?GR                  R                  /5      \/" 5       GS>GS j5       5       Grf\I" \R|                  R~                  GR                  5      GS 5       Grg\I" \R|                  R~                  GR                  5      GS 5       Grh\I" \?GR                  5      \/" 5       SSSSGS.GS j5       5       GrjGS Grk\I" \?GR                  5      GS 5       Grm\I" \?GR                  5       GSfGS j5       Gro\I" \?GR                  5       GSfGS j5       Grq\I" \?GR                  5       GSfGS j5       Grs\I" \?GR                  5      \/" 5       SSGS.GS j5       5       Gru\I" \?GR                  5      \/" 5       GS\]S+\S\4GS j5       5       Grw\I" \?GR                  5      S+\4GS j5       Gry\I" \?GR                  5      \/" SUS9S+\S\4GS j5       5       Grz\I" \?GR                  5      \/" 5       S+\S\4GS  j5       5       Gr{GS! Gr|     GS`GS"\GS#\GS\R$                  S-  GS\R$                  S-  GS$\S-  SB\S-  GS\R$                  S-  SD\R                  S-  GS\^4GS% jjGr}\I" \?GR                  5      \/" 5          GS>GS"\GS#\GS$\S-  SB\S-  SD\R                  S-  S\4GS& jj5       5       Gr\I" \?GR                   /5           GS`GS"\R$                  GS#\R$                  GS\R$                  GS\R$                  GS$\R$                  S-  SB\R$                  S-  GS\R$                  S-  SD\R                  S-  GS\^4GS' jj5       Gr\I" \?GR                  GR&                  5      GSgGS( j5       Gr\I" \?GR                  5      \/" 5       GS)\S,\]GS*\^S\4GS+ j5       5       Gr\I" \?GR                  5      \/" 5       GSGS, j5       5       Gr\I" \?GR                  5      \/" 5          GShS@\S'\GS-\]GS.\^GS/\^S\4GS0 jj5       5       Gr\I" \?GR                  R                  5       GSiS&\Sa\\\   GS1\\\]   GS2\4GS3 jj5       GrGS4 Gr\I" \?GR                  R                  5          GSjGS5 j5       GrGS6 GrG\" \?GR                  5        G\" \?GR                   5        G\" \?GR"                  5        G\" \?GR$                  5        G\" \?GR&                  5        G\" \?GR(                  5        G\" \?GR*                  5        G\" \?GR,                  5        G\" \?GR.                  5        G\" \?GR0                  5        G\" \?GR2                  5        G\" \?GR4                  5        G\" \?GR6                  5        G\" \?GR8                  5        G\" \?GR:                  5        G\" \?GR<                  5        G\" \?GR>                  5        G\" \?GR@                  5        G\" \?GRB                  5        G\" \?GRD                  5        G\" \?GRF                  5        GS7 Gr\I" \?GRJ                  5      \/" 5       GS8 5       5       Gr\I" \?GRL                  5      \/" 5       SJGS9.GS: j5       5       Gr\I" \?GRN                  5      \/" 5       SJGS9.GS; j5       5       GrG\" \?GRJ                  5      GrG\" \?GRL                  5      GrG\" \?GRN                  5      GrS SK0rS SGKrS SGKrGS< GrG\" 5         g(k      N)CallableSequence)Enum)wraps)TypeVar)	ParamSpec)SymBoolSymFloatTensor)_add_op_to_registry_convert_out_paramsglobal_decomposition_table
meta_table)
OpOverload)_prim_elementwise_meta$ELEMENTWISE_PRIM_TYPE_PROMOTION_KIND)BoolLikecorresponding_complex_dtypecorresponding_real_dtypeelementwise_dtypesELEMENTWISE_TYPE_PROMOTION_KIND	FloatLikeIntLikemake_contiguous_strides_forNumber
NumberTypesuggest_memory_formatsym_min
TensorLike)_maybe_convert_to_dtype_maybe_resize_out_resize_output_check_safe_copy_outout_wrapper)_broadcast_shapes_maybe_broadcast)_config)ScalingTypeSwizzleType)_pytree_T_PatenIMPLMeta   c                     X-   S-
  U-  $ N    abs     j/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/_meta_registrations.pyceil_divr9   9   s    EAI!    c                     X-   S-
  U-  U-  $ )z$Rounds up x to nearest multiple of yr3   r4   xys     r8   round_upr?   =   s    UQY1!!r:   returnc                    ^  U 4S jnU$ )Nc                 Z   >^  [        T 5      m U 4S jn[        R                  " UT5        T $ )Nc                 (   > [        [        U T5        g N)r   r   )opfns    r8   register0register_meta.<locals>.wrapper.<locals>.registerF   s    
B3r:   )r   pytree	tree_map_)rF   rG   rE   s   ` r8   wrapperregister_meta.<locals>.wrapperC   s)     $	4 	2&	r:   r4   )rE   rK   s   ` r8   register_metarM   B   s     Nr:   type_promotionc                     [         R                  " USU 06u  p#U Vs/ s H  n[        XC5      PM     nn[        U6 n[	        US[
        R                  06$ s  snf )Ntype_promotion_kindrN   )utilsr   r    r&   r   r   DEFAULT)rN   args_result_dtyper=   s        r8   elementwise_metarV   O   sp    
 ..	*OA ?CCd#A4dDC T"D "	BJJ  Ds   Ac                     [         R                  [         R                  [         R                  [         R                  [         R
                  [         R                  0nUR                  X 5      $ rD   )torch	complex32halfcfloatfloatcdoubledoubleget)dtypefrom_complexs     r8   toRealValueTyperb   c   sC    ekku||L
 E))r:   c                 p   ^ ^ [        [        T /UQ76 5      m[        R                  " TT :H  UU 4S j5        g )Nc                     > ST ST  3$ )Nzoutput with shape z# doesn't match the broadcast shape r4   )broadcasted_shape
self_shapes   r8   <lambda>)check_inplace_broadcast.<locals>.<lambda>p   s    $ZL0STeSfgr:   )tupler%   rX   _check)rf   
args_shapere   s   ` @r8   check_inplace_broadcastrl   l   s0    /
HZHI	LLZ'gr:   Fc	           	        ^ ^^^^	 [        T [        R                  5      (       a)  [        R                  " T R	                  5       S:H  S 5        [        T[        R                  5      (       a)  [        R                  " TR	                  5       S:H  S 5        [        S T TT4 5       5      (       a`  [        R                  " [        R                  " 5       5      m	Tc  T	mOO[        R                  " [        R                  " T5      U	U4S j5        OT=(       d    [        R                  " 5       m[        T[        R                  5      (       d  [        S[        T5       35      e[        R                  " [        T[        5      UU U4S j5        [        T[        5      (       d  [        S[        T5       35      e[        R                  " TS:  S	 5        [        R                  " T4TUS
UUS9$ )Nr   c                      gNz:linspace only supports 0-dimensional start and end tensorsr4   r4   r:   r8   rg   (meta_linspace_logspace.<locals>.<lambda>       Pr:   c                      gro   r4   r4   r:   r8   rg   rp      rq   r:   c              3   B   #    U  H  n[        U[        5      v   M     g 7frD   )
isinstancecomplex).0args     r8   	<genexpr>)meta_linspace_logspace.<locals>.<genexpr>   s     
C/B:c7##/Bs   c                     > ST  ST 3$ )Nzlinspace(): inferred dtype z& can't be safely cast to passed dtype r4   )default_complex_dtyper`   s   r8   rg   rp      s    56K5LLrsxryzr:   zdtype must be torch.dtype, got c                     > S[        T5      R                   S[        T 5      R                   S[        T5      R                   S3$ )Nz4received an invalid combination of arguments - got (, ))type__name__)endstartstepss   r8   rg   rp      sB     u+r$s),,-RU0D0D/EQHr:   zsteps must be IntLike, got c                      g)Nz$number of steps must be non-negativer4   r4   r:   r8   rg   rp      s    %Kr:   metar`   layoutdevice
pin_memoryrequires_grad)rt   rX   r   rj   dimanyrQ   r   get_default_dtypeis_complex_dtyper`   AssertionErrorr   _check_typer   empty)
r   r   r   baser`   r   r   r   r   r{   s
   ``` `    @r8   meta_linspace_logspacer   t   s}    %&&IIK1P	
 #u||$$GGINP	

 
CsE/B
CCC % A A##%!
 =)ELL&&u-z
 2002eU[[))>tE{mLMM 
5'"	H
 eW%%:4;-HII	LL!KL;;	# r:   c                 6  ^ [         R                  " TR                  [         R                  :H  U4S j5        [         R                  " U R                  5       S:H  =(       a    TR                  5       S:g  (       + S 5        U R                  TR                  5      $ )Nc                  "   > ST R                    3$ )Nz2take(): Expected a long tensor for index, but got r`   indexs   r8   rg   meta_take.<locals>.<lambda>   s    DU[[MRr:   r   c                      g)Nz*take(): tried to take from an empty tensorr4   r4   r:   r8   rg   r      s    <r:   )rX   rj   r`   long_check_indexnumel	new_emptyshape)selfr   s    `r8   	meta_taker      sm     
LLuzz!R
 
ZZ\Q55;;=A#56< >>%++&&r:   r   c                b  ^ ^^ T R                   nTR                   n[        R                  " X4:H  S 5        [        R                  " T R                  T5      S:H  =(       a    TR                  T5      S:H  UUU 4S j5        [	        T R
                  TR
                  5      nT R                  U5      $ )Nc                      g)Nz=linalg.cross: inputs must have the same number of dimensions.r4   r4   r:   r8   rg   linalg_cross.<locals>.<lambda>       Or:   r0   c                  V   > ST  STR                  T 5       STR                  T 5       3$ )Nzlinalg.cross: inputs dimension z must have length 3. Got  and size)r   otherr   s   r8   rg   r      s1    -cU 399S>"%

3'8:r:   )ndimrX   rj   r   r%   r   r   )r   r   r   x_dy_d	out_shapes   ```   r8   linalg_crossr      s     ))C
**C	LL
O 
LL		#!4

31 4	
 "$**ekk:I>>)$$r:   c                 x    [        U S5        [        U S5        [        R                  " U [        R                  S9$ )Nzlinalg.matrix_expmemory_format)squareCheckInputscheckFloatingOrComplexrX   
empty_likecontiguous_formatr   s    r8   linalg_matrix_expr      s3     d/04!45D0G0GHHr:   valuesindicesc                 R   [         R                  " U R                  U R                  U R                  S9n[         R                  " U R                  U R                  [         R
                  S9nU R                  5       S:w  a%  U R                  S:w  a  [        XR                  5        X#4$ )Nr   r`   r   )	rX   r   r   r   r`   int64r   r   maybe_wrap_dim)r   r   r   r   s       r8   	cummaxminr      sl    
 [[DKKtzzJFkk$**T[[LGzz|qTYY!^sII&?r:   c                 r    [        XR                  5        [        R                  " U [        R                  S9$ Nr   )r   r   rX   r   r   )r   r   s     r8   logcumsumexpr      s)     3		"D0G0GHHr:   c                V  ^ UR                   n[        U5      nXV-
  n[        [        U5      5      n[        U5       V	s/ s H  n	SPM     n
n	U H  nSX'   M	     / / pU H0  nX   (       d  UR	                  U5        M  UR	                  U5        M2     X-   n[        U5      nUR                  5       mUS U nUR                  U4S jSS9  XUS  -   nUR                  U5      nS/[        UR                  US  5      -   nUR                  U5      nUR                  S5      nUUS'   [        U5      n[        [        U5      5       H  nX#U      UUS-   '   M     U R                  U[        R                  S9  [        U5       V	s/ s H  n	SPM     nn	SnUS-
  nUS:  a1  UU R                  S5      -  UUU   '   UX(U      -  nUS-  nUS:  a  M1  [        Xu5       H   nU R                  SUU-
  -   5      UUU   '   M"     U R                  UUU R                  5       5        U $ s  sn	f s  sn	f )	NFTc                    > TU    $ rD   r4   )r=   self_stridess    r8   rg   _exec_fft.<locals>.<lambda>  s	    <?r:   keyreverser   r   r3   r   )r   lenlistrangeappendstridesortpermuter   reshaper   resize_rX   r   as_strided_storage_offset)outr   	out_sizesr   forwardr   signal_ndim
batch_dimsdim_permuterT   is_transformed_dimdleftright	batch_endtmpinputbatched_sizes
batch_sizebatched_out_sizesiout_stridesbatch_numelr   s                          @r8   	_exec_fftr      s4   99Dc(K#J uT{#K).t5A%5 $  b%!$KKNLLO	 
 ,KD	I;;=L
jy
!CHH*DH9IJ//KLL%E D4JK 899MMM-(EAJ!M!]+3s8_#,V#4!a%  KK!1H1HKI $Dk*k1kK*KQA
q&&1CJJqM&AKN#yQ00	Q q& :$&)jja*n1E&FKN# %OOI{C,>,>,@AJW 6@ +s   H!H&r   r   exclude_lastc                    ^ [        U5      nU R                  5       mUS [        U5      [        U5      -
   R	                  U4S jS9  U$ )Nc                    > TU    $ rD   r4   )r   r   s    r8   rg   _sort_dims.<locals>.<lambda>0  s	    l1or:   )r   )r   r   r   intr   )r   r   r   sorted_dimsr   s       @r8   
_sort_dimsr   ,  sL    s)K;;=L6#k"S%667<<% =  r:   c                 
   [         R                  " U R                  R                  5        U(       d  U R	                  5       $ [        X5      nU R                  U R                  5       5      n[        XPU R                  5       XCS9$ )Nr   )	rX   rj   r`   
is_complexcloner   r   r   r   )r   r   normalizationr   r   r   s         r8   meta_fft_c2cr   7  s\     
LL&&'zz|T'K
..
%CS		[JJr:   c                 n    [        U 5      [        :  d!  [        U 5      S:  a  U S   S:X  a
  U S   S:X  a  gg)N   r   r3   FT)r   cufft_max_ndimr   s    r8   use_optimized_cufft_pathr   F  s3    
3x. SX]s1v{s1vQR{r:   c                 |  ^ [         R                  " U R                  R                  5        [	        U R                  5       5      n[	        U5      nUS   nXF   S-  S-   n[	        U5      nXxU'   U(       a  XuU'   [        U 5      S:X  d  [        U 5      S:X  Gax  U R                  U[        R                  " U R                  5      S9n	U n
[        U 5      S:X  a  [        U5      (       a  [        XXQSS9  O[        U5      S:X  a  UOUn[        XX/SS9  [        U5      S:  a.  U R                  U[        R                  " U R                  5      S9n
US S nU(       au  XpU
R                  5       mUR                  U4S	 jSS
9  [        [         [        U5      5      nU[        U5      U-
  S  n[        XXSS9  US [        U5      U-
   nU(       a  Mu  U(       d7  U	R                  U5      XV   :w  a   U
R#                  U[         R$                  S9  U
n	U	$ U R                  U[        R                  " U R                  5      S9$ )Nr   r   r3   cudaxpur   Tr   c                    > TU    $ rD   r4   )r   stridess    r8   rg   meta_fft_r2c.<locals>.<lambda>t  s	    '!*r:   r   r   )rX   rj   r`   is_floating_pointr   r   device_hintr   rQ   r   r   r   r   r   r   minr   r   r   )r   r   r   onesidedinput_sizesr   last_dimlast_dim_halfsizeonesided_sizesoutputworking_tensortarget_sizesr   max_dims	last_dimsr   s                  @r8   meta_fft_r2cr  M  s+    
LL--.tyy{#K[!I2wH#-2Q6+&N08/(4F"k$&75&@ U>>tzzJ   
 t&+CC+H+HfidK ),CA9>LflJPTU3x!|!%U%F%Ftzz%R "0 "
 cr(K)7(//1  ,d !  ~s;/?@'K(88(C(EF	Nt **GC,<x,GH + {{8$	(;;&&y@W@W&X' ~~U>>tzzJ  
 	
r:   )	generatorc                D    [        U[        R                  " U /5      5      $ rD   )r!   rX   Size)nr  r   s      r8   meta_randpermr    s    S%**aS/22r:   r`   r   r   r   c                .    [         R                  " XX#US9$ Nr  rX   r   )r  r`   r   r   r   s        r8   meta_randperm_defaultr    s     ;;	v r:   c                t   ^ ^ Sm[         R                  " T T:  U U4S j5        [         R                  " XX4US9$ )Nr   c                     > ST ST  3$ Nz:random_ expects 'from' to be less than 'to', but got from=z >= to=r4   highlows   r8   rg   meta_randint.<locals>.<lambda>      LSEQXY]X^_r:   r  rX   rj   r   )r  r   r`   r   r   r   r  s   `     @r8   meta_randintr#    s:     C	LLs
_ ;;&J r:   c                p   ^ ^ [         R                  " TT :  UU 4S j5        [         R                  " X#XEUS9$ )Nc                     > ST ST  3$ r  r4   r  s   r8   rg   "meta_randint_low.<locals>.<lambda>  r!  r:   r  r"  )r  r  r   r`   r   r   r   s   ``     r8   meta_randint_lowr'    s5     
LLs
_ ;;&J r:   c                .    [         R                  " XX#US9$ r  r  )r   r`   r   r   r   s        r8   meta_rand_defaultr)    s     ;;&J r:   r   lastdimc                    [         R                  " U R                  R                  5        [	        U 5      S:X  a  [        U R                  5       5      nX4US   '   U R                  U[        U R                  5      S9n[        U5      (       a(  [        UU R                  [         R                  S9UUSS9$ [        U5      S:  a  [        XS S SU5      nOU R                  [         R                  S9n[        XVXAS   /SS9$ U n[        U5      S:  a  US S n[        XUSS9nUSS  n[        UR                  5       5      nX4US   '   U R                  U[        U R                  5      S9n	[        XXASS9$ )	Nr   r   r   r   Fr   r3   r   )rX   rj   r`   r   r  r   r   r   rb   r   r   r   r   r   r   )
r   r   r   r*  r   r
  tempr   c2c_dimsr   s
             r8   meta_fft_c2rr.    s^    
LL&&'4F"%	$#b'	1LM#C((

)@)@
A  3x!|#Dcr(Aw?zz0G0GzHV92wiOO s8a<3BxH NEbc(C&	$#b'nnYodjj.InJYUCCr:   c                 `   SSK Jn  U" U 5      (       d%  [        R                  " U 5      S:X  a  [	        S5      e[        U[        5      (       a`  UR                  X5      nU R                  5       UR                  5       :w  a-  [        R                  R                  X@R                  5       5        U $ )Nr   )free_unbacked_symbolsr3   zQmore than one element of the written-to tensor refers to a single memory location)%torch.fx.experimental.symbolic_shapesr0  rX   _debug_has_internal_overlapRuntimeErrorrt   r   tor   r-   expand_copydefault)r   srcnon_blockingr0  intermediates        r8   
meta_copy_r:    s     L "$''E,M,Md,SWX,X_
 	
 #vvvd199;,++--$$\99;?Kr:   c                     [        U R                  5       5      n[        U R                  5       5      nXR                  5       :  a  SOX!   X1   -  nUR	                  US5        UR	                  X5        X#4$ r2   )r   r   r   r   insert)tensorr   result_sizesresult_strides
new_strides        r8   inferUnsqueezeGeometryrA    sf    &L&--/*NZZ\)|/@>CV/VJQ#*''r:   c                 z    [        XR                  5       S-   5      n[        X5      u  p#U R                  X#5        U $ r2   )r   r   rA  r   )r   r   g_sizes	g_stridess       r8   meta_unsqueeze_rE    s6    
hhj1n
-C/:GW(Kr:   r   weight_metabias_activation_opt	out_dtypec                 N   [        U R                  5      nUbS  UR                  S5      UR                  S5      :w  a/  [        SUR                  S5       SUR                  S5       35      eUR                  S5      U R                  S5      S-  :w  a2  [        SUR                  S5       SU R                  S5      S-   35      eUR                  S5      US'   [	        U R                  5      S:w  a"  [        S	[	        U R                  5       S
35      eSU R                  S5      4nUbM  U R
                  [        R                  :X  a  U[        R                  :X  d  [        SU R
                   SU 35      eU R                  UUc  U R
                  OUS9R                  Xg5      nU$ )Nr   z%output size mismatch: weight.size(0)= != bias.size(0)=r3   r   r   zweight.size(1)=z != input.size(-1)/2=z0we can only handle the squashed input case, got D inputzKout_dtype is only supported for i8i8->i32 linear operator, got input.dtype=, out_dtype=r   )r   r   r   r   r   r`   rX   int8int32r   
as_strided)	r   rF  rG  rH  rI  rJ  output_sizestransposed_stridesr
  s	            r8   meta_sparse_structured_linearrT  %  s    $L;;q>TYYq\) 7A7GGXY]YbYbcdYeXfg  {{1~B!++fkk!n--B5::b>TUCUBVW
 	
 {{1~L 5;;1>s5;;?O>PPWX
 	
 UZZ]+uzz)i5;;.F ]^c^i^i]jjv  xA  wB  C  __&.ekkI   j2 
 Mr:   mat1	mat1_metamat2c                 <   [        U R                  5      S:w  a"  [        S[        U R                  5       S35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      eU R                  S5      UR                  S5      S-  :w  a2  [        SU R                  S5       S	UR                  S5      S-   35      eU R                  S5      UR                  S5      /nUbM  UR                  [
        R                  :X  a  U[
        R                  :X  d  [        S
UR                   SU 35      eUR                  UUc  UR                  OUS9nU$ )Nr   mat1 must be 2D, got Dmat1_meta must be 2D, got mat2 must be 2D, got r3   r   mat1.size(1)= != mat2.size(0)/2=Jout_dtype is only supported for i8i8->i32 linear operator, got mat2.dtype=rN  r   	r   r   r   r   r`   rX   rO  rP  r   )rU  rV  rW  rJ  rR  r
  s         r8   meta_sparse_structured_mmra  Q  sv    4::!4S_4EQGHH
9??q 9#ioo:N9OqQRR
4::!4S_4EQGHHyy|tyy|a''DIIaL>)<TYYq\A=M<NO
 	
 IIaL$))A,/L

ejj(Y%++-E \]a]g]g\hhtu~t  A  ^^%-djj9  F
 Mr:   r3   )alphabetarJ  c                X   [        U R                  5      S:w  a"  [        S[        U R                  5       S35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      eU R                  S	5      UR                  S	5      :w  a/  [        S
U R                  S	5       SUR                  S	5       35      eUR                  S5      UR                  S	5      S-  :w  a2  [        SUR                  S5       SUR                  S	5      S-   35      eUR                  S	5      UR                  S5      /nUbM  UR                  [
        R                  :X  a  U[
        R                  :X  d  [        SUR                   SU 35      eUR                  UUc  UR                  OUS9nU$ )Nr3   zKonly input broadcasted to columns of mat1 * mat2 product is supported, got rM  r   rY  rZ  r[  r\  r   zUonly input broadcasted to columns of mat1 * mat2 product is supported, input.size(0)=z != mat1.size(0)=r]  r^  r_  rN  r   r`  )	r   rU  rV  rW  rb  rc  rJ  rR  r
  s	            r8   meta_sparse_structured_addmmre  q  s    5;;1YZ]^c^i^iZjYkkrs
 	
 4::!4S_4EQGHH
9??q 9#ioo:N9OqQRR
4::!4S_4EQGHHzz!}		!$"ZZ]O+<TYYq\NL
 	
 yy|tyy|a''DIIaL>)<TYYq\A=M<NO
 	
 IIaL$))A,/L

ejj(Y%++-E \]a]g]g\hhtu~t  A  ^^%-djj9  F
 Mr:   compressed_Adense_Brb  transpose_resultalg_idsplit_ksplit_k_modec	           	      X   UR                   [        R                  [        R                  [        R                  [        R
                  [        R                  1;  a  [        SUR                    35      eU R                   UR                   :w  a%  [        SU R                    SUR                    35      e[        UR                  5      S:w  a"  [        S[        UR                  5       S35      eU R                   [        R
                  [        R                  4;   n	U	(       a   UR                  5       (       a  [        S5      eUR                  S5      n
U R                  S	5      nUb4  XR                  S	5      :w  a   [        S
U SUR                  S	5       35      eUbr  U	(       aB  U[        R                  [        R                  [        R                  [        R                  1;   d)  [        SU R                    SUR                    SU S35      eU(       a  X4OX4nUR                  XS9$ )NzA_cslt_sparse_mm only supports fp16, bf16, int8, and fp8e4m3, got z%inputs must have the same dtype, got r   r   z-_cslt_sparse_mm only supports 2d inputs, got rZ  z.dense input must be transposed for 8bit dtypesr3   r   zbias size mismatch: m=rL  zout_dtype is not supported for z x z -> z matmul!r   )r`   rX   float32float16bfloat16rO  float8_e4m3fnr   r   r   is_contiguousr   rP  r   )rf  rg  rH  rb  rJ  rh  ri  rj  rk  is_8bit_input_typer  moutput_shapes                r8   meta__cslt_sparse_mmru    s    }}

  OPWP]P]_
 	
 W]]*3L4F4F3GuW]]O\
 	
 7==Q;C<N;OqQ
 	
 &++

E<O<O/PP  "" !QRRQA!A		! (+<TYYq\NK  ##	 !1,2D2D1ESW[\e[ffno  .A6A6L\;;r:   T)include_selfr   sourcereducerv  c                H    [         R                  " U [         R                  S9$ r   rX   r   r   r   r   r   rw  rx  rv  s         r8   meta_index_reducer|    s     D0G0GHHr:   c                    U $ rD   r4   r{  s         r8   meta_index_reduce_r~    s	     Kr:   c                     [        U R                  5       5      nU R                  5       S:  a  UR                  5       X1'   U R	                  U5      $ Nr   )r   r   r   r   r   )r   r   r   result_sizes       r8   meta_index_selectr    s>     tyy{#KxxzA~ ;;=>>+&&r:   )lengthsr   offsetsaxisunsafeinitialdatar  r  r  r  c                   ^ ^ Ub  [        S5      eUU 4S jnUb  U" UR                  5      $ Ub+  UR                  S S UR                  S   S-
  4-   n	U" U	5      $ [        S5      e)Nz?segment_reduce(): indices based reduction is not supported yet.c                    > [         R                  " U TR                  TS-   S  -   TR                  S[         R                  S9$ )Nr3   r   r`   r   r   )rX   r   r   r`   r   )lengths_shaper  r  s    r8   segment_reduce_lengths_tensor:meta_segment_reduce.<locals>.segment_reduce_lengths_tensor  s>    {{DJJtaxz22**11	
 	
r:   r   r3   z<segment_reduce(): Either lengths or offsets must be defined.)NotImplementedErrorr   r3  )
r  rx  r  r   r  r  r  r  r  r  s
   `    `    r8   meta_segment_reducer    s|     !M
 	

 ,W]];; cr*gmmB.?!.C-EE,];;
U
VVr:   c                 $    U R                  S5      $ Nr4   r   r   s    r8   meta_maxr  %       >>"r:   c                     [         R                  " U R                  U45      n[        XU5      nU R	                  U5      U R	                  U[
        R                  S94$ Nr   rQ   reduction_dimsr   _compute_reduction_shaper   rX   r   r   r   keepdimrt  s       r8   meta_max_dimr  +  P    


tzzC6
2C+Dw?L|$|5::6 r:   c                 $    U R                  S5      $ r  r  r   s    r8   meta_minr  5  r  r:   c                     [         R                  " U R                  U45      n[        XU5      nU R	                  U5      U R	                  U[
        R                  S94$ r  r  r  s       r8   meta_min_dimr  ;  r  r:   c                     U R                  5       (       a  [        U R                  5      nO[        U [        R
                  S9u  p![        R                  " XS9$ NrP   r   )r   r   r`   r   r   INT_TO_FLOATrX   r   )r   rU   rT   s      r8   
meta_angler  E  sH    /

;, ? L L
 D55r:   c                     [         R                  " XR                  5       U R                  5        UR	                  [         R
                  " U 5      5      $ rD   )rX   _resize_output_r   r   copy_angle)r   r   s     r8   meta_angle_outr  Q  s4    	#yy{DKK899U[[&''r:   c                     g rD   r4   )vals    r8   assert_asyncr  W      
r:   c                     g rD   r4   )r  
assert_msgs     r8   assert_async_metar  \  r  r:   c                     g rD   r4   )ss    r8   
print_metar  a  r  r:   r`   r   r   r   r   c                 ,    [         R                  " SSS9$ )Nr   r   r   r  r  s        r8   make_dep_tokenr  f  s     ;;q((r:   c                 j    SSK Jn  [        U [        [        45      (       a  [        S5      eU" XUS9  g )Nr   )constrain_range'Constraining SymFloat or Symbool is nyir  max)r1  r  rt   r
   r	   
ValueError)r   r  r  r  s       r8   sym_constrain_ranger  r  s0     F$7+,,BCCDs+r:   c                 .    [         R                  XUS9  U$ Nr  )r-   r  r   r  r  	dep_tokens       r8   functional_sym_constrain_ranger  |  s    T4r:   c                 <   SSK Jn  Uc  Uc  [        R                  " U S:  5        g [	        U [
        [        45      (       a  [        S5      e[        U 5      [        L a7  Ub  [        R                  " X:  5        Ub  [        R                  " X:*  5        g U" XUS9  g )Nr   )_constrain_range_for_sizer  r  )
r1  r  rX   rj   rt   r
   r	   r  r   r   )r   r  r  r  s       r8   sym_constrain_range_for_sizer    s     P
{s{TQY$7+,,BCCDzS?LL%?LL%d5r:   c                 .    [         R                  XUS9  U$ r  )r-   r  r  s       r8   'functional_sym_constrain_range_for_sizer    s    %%d%=r:   c                     U$ rD   r4   )r  r  r  s      r8   functional_assert_async_metar    s    r:   f_namec                    ^ ^ T R                  5       S:  a  [        T ST R                  5        35      e[        R                  " T R	                  S5      T R	                  S5      :H  UU 4S j5        g )Nr   z8: The input tensor must have at least 2 dimensions, got r   c                  V   > T  STR                  S5       STR                  S5       S3$ )Nz5: A must be batches of square matrices, but they are r   by r   	 matricesr   )r  r   s   r8   rg   #squareCheckInputs.<locals>.<lambda>  s/    6( 		"d499R=/Dr:   )r   r   rX   rj   r   )r   r  s   ``r8   r   r     s^    xxzA~hNtxxzl[
 	
 
LL		"2&	Dr:   Anamec                   ^ ^^ [         R                  " T R                  TR                  :H  UU 4S j5        [         R                  " T R                  TR                  :H  UU 4S j5        [         R                  " TR	                  S5      TR	                  S5      :H  U4S j5        [         R                  " TR	                  S5      T R	                  S5      :H  UUU 4S j5        g )Nc                  >   > STR                    ST R                    S3$ )Nz:Expected b and A to be on the same device, but found b on z
 and A on 	 instead.r  r  r   s   r8   rg   (linearSolveCheckInputs.<locals>.<lambda>  s     H{{m:ahhZy:r:   c                  >   > STR                    ST R                    S3$ )Nz=Expected b and A to have the same dtype, but found b of type z and A of type r  r   r  s   r8   rg   r    s     Kzzl/!'')=r:   r   r  c                  R   > ST R                  S5       ST R                  S5       S3$ )Nz3A must be batches of square matrices, but they are r  r  r   r  r   r  s   r8   rg   r    s+    FF2J<tAFF2J<yBr:   c                     > ST ST R                  S5       ST R                  S5       STR                  S5       STR                  S5       3
$ )NzIncompatible matrix sizes for z: each A matrix is r   r  z but each b matrix is r  r   )r  r  r   s   r8   rg   r    sM    ,TF 3D$TYYr]O4		"Hr:   )rX   rj   r   r`   r   )r   r  r  s   ```r8   linearSolveCheckInputsr    s    	LLqxx	
 
LL

agg	
 
LL	r
affRj 	
 
LL	r
diim#	
r:   tallow_low_precision_dtypesc                 h  ^^ U R                   m[        R                  " U R                  5       =(       d    U R	                  5       UU4S j5        U(       d\  [        R                  " T[        R
                  [        R                  [        R                  [        R                  4;   UU4S j5        g g )Nc                     > T ST  3$ )Nz<: Expected a floating point or complex tensor as input. Got r4   r`   r  s   r8   rg   (checkFloatingOrComplex.<locals>.<lambda>  s    6(VW\V]^r:   c                     > T ST  3$ )Nz*: Low precision dtypes not supported. Got r4   r  s   r8   rg   r    s    vhHPr:   )	r`   rX   rj   r  r   r\   r^   r[   r]   )r  r  r  r`   s    ` @r8   r   r     sn    
 GGE	LL	/^ &ekk5<<u}}MMP	
 &r:   arg_namec                 b   ^^ [         R                  " U R                  5       S:  UU4S j5        g )Nr   c                     > T ST  S3$ )Nz: The input tensor z! must have at least 2 dimensions.r4   )r  r  s   r8   rg   checkIsMatrix.<locals>.<lambda>  s    6(-hZ7XYr:   )rX   rj   r   )r  r  r  s    ``r8   checkIsMatrixr    s    	LL	1Yr:   Br   c                   ^ ^^^ [        T T5        [        TT5        [        R                  " T(       a#  T R	                  S5      TR	                  S5      :H  O"T R	                  S5      TR	                  S5      :H  U UUU4S j5        g )Nr  r   c                     > T ST(       a  SOS ST R                  S5       ST R                  S5       STR                  S5       STR                  S5       S	3$ )
Nz2: Incompatible shapes of A and B for the equation zAX = BzXA = Bz (r  r=   r   r   r~   r   )r  r  r  r   s   r8   rg   #checkInputsSolver.<locals>.<lambda>  sV    hHxX.AaffRj\qvvbzl!AFF2J<qJr:   )r   r  rX   rj   r   )r  r  r   r  s   ````r8   checkInputsSolverr    sY    a !V	LL$(r
affRj affRjAFF2J.F	
r:   resultfn_nameresult_namec                 v   ^ ^^^ [         R                  " TR                  TR                  :H  U UUU4S j5        g )Nc            	      L   > T  ST ST STR                    STR                    3	$ )Nz: Expected z5 and input tensors to be on the same device, but got z on z and input on r  )r  r   r  r  s   r8   rg   !checkSameDevice.<locals>.<lambda>  s0    i{;-/dm4nU\\NLr:   )rX   rj   r   )r  r  r   r  s   ````r8   checkSameDevicer    s&     
LL%	
r:   UPLOc                    ^  T R                  5       n[        R                  " [        T 5      S:H  =(       a    US:H  =(       d    US:H  U 4S j5        g )Nr3   ULc                     > ST  3$ )Nz1Expected UPLO argument to be 'L' or 'U', but got r4   )r  s   r8   rg   checkUplo.<locals>.<lambda>  s    CD6Jr:   )upperrX   rj   r   )r  UPLO_uppercases   ` r8   	checkUplor    s<    ZZ\N	LLD	QKNc1J^s5JJr:   eigenvalueseigenvectorsr  	compute_vc                 P   [        U S5        [        U5        [        U R                  5      nU(       a,  U R	                  U5      nUR                  U[        USS95        OU R	                  S/5      nUR                  5         U R	                  U[        U R                  5      S9nXT4$ )Nzlinalg.eighF	row_majorr   r   )
r   r  r   r   r   r   r   poprb   r`   )r  r  r  r   vecsvalss         r8   meta__linalg_eighr    s     a'dOME{{5! ;EU ST{{A3	IIK;;uOAGG$<;=D:r:   c                     [        U S5        [        R                  " U R                  5      (       a  U R                  O[        R                  " U R                  5      nU R                  U R                  S S US9$ )Nzlinalg.eigvalsr   r   )r   rQ   r   r`   r   r   r   )r   complex_dtypes     r8   meta__linalg_eigvalsr  &  sf     e-. !!%++.. 	..u{{; 
 ??5;;s+=?AAr:   c                    [        U S5        [        R                  " U R                  5      (       a  U R                  O[        R                  " U R                  5      nU R                  U R                  S S US9nU R                  U R                  US9n[        U 5      S:H  nUR                  U R                  [        U R                  US95        X#4$ )Nz
linalg.eigr   r   r   r  )
r   rQ   r   r`   r   r   r   r  r   r   )r   r  r   vectorsis_cudas        r8   meta_linalg_eigr  2  s     e\* !!%++.. 	..u{{; 
 __U[["-]_CFooekko?G% F*G0P ?r:   r7  c                 p    U R                   R                  [        R                  S9R	                  SS5      $ )Nr   r  r   )mTr   rX   r   	transpose)r7  s    r8   cloneBatchedColumnMajorr  D  s*    66<<e&=&=<>HHRPPr:   r  c                     [        U 5      $ rD   )r  )r   r  r  s      r8   _cholesky_solve_helperr  H  s     #4((r:   c                    ^ ^ [         R                  " T R                  S:  U 4S j5        [         R                  " TR                  S:  U4S j5        [        T TS5      u  p4[	        X4U5      $ )Nr   c                  $   > ST R                    S3$ )Nz-b should have at least 2 dimensions, but has  dimensions insteadr   r   s   r8   rg    cholesky_solve.<locals>.<lambda>S  s    ?		{J]^r:   c                  $   > ST R                    S3$ )Nz-u should have at least 2 dimensions, but has r  r  r  s   r8   rg   r  W  s    ?xGZ[r:   cholesky_solve)rX   rj   r   !_linalg_broadcast_batch_dims_namer  )r   r  r  self_broadcastedA_broadcasteds   ``   r8   r  r  N  sd     
LL		Q^ 
LL	![ 'Ha!'# ""25IIr:   c                     U R                  5       S:X  a#  [        R                  " U [        R                  S9$ [	        U S5        [        U 5      $ )Nr   r   cholesky)r   rX   r   legacy_contiguous_formatr   r  r   r  s     r8   r$  r$  _  s@     zz|qE4R4RSSdJ'"4((r:   c                 0    [        U S5        [        U 5      $ )Ncholesky_inverse)r   r  r&  s     r8   r(  r(  h  s     d./"4((r:   check_errorsc                 
   [        U S5        [        U S5        U R                  n[        U5      n[	        US5      nU R                  U5      nUR                  X55        U R                  USUS-
   [        R                  S9nXg4$ )Nzlinalg.choleskyFr   r   r   )	r   r   r   r   r   r   r   rX   rP  )r  r  r)  A_shaper   	L_stridesr  infoss           r8   linalg_cholesky_exr.  p  s|    a*+1/0ggGw<D ,GU;I	GAMM'% KKD1H-U[[KAE8Or:   tauc                 J  ^ ^^ [         R                  " T R                  S:  S 5        [         R                  " T R                  S5      T R                  S5      :  S 5        [         R                  " T R                  S5      TR                  S5      :  S 5        [         R                  " T R                  TR                  -
  S:H  U U4S j5        T R                  S:  a<  T R                  S S nTR                  S S m[         R                  " TU:H  U4S	 j5        [         R                  " TR
                  T R
                  :H  U U4S
 j5        [        STT S5        [         R                  " T R                  [        T R                  SS9T R
                  T R                  S9$ )Nr   c                      g)NzHtorch.linalg.householder_product: input must have at least 2 dimensions.r4   r4   r:   r8   rg   ,linalg_householder_product.<locals>.<lambda>      Zr:   r  r   c                      g)Nzbtorch.linalg.householder_product: input.shape[-2] must be greater than or equal to input.shape[-1]r4   r4   r:   r8   rg   r2    s    tr:   c                      g)Nz`torch.linalg.householder_product: input.shape[-1] must be greater than or equal to tau.shape[-1]r4   r4   r:   r8   rg   r2    s    rr:   r3   c                  <   > STR                    ST R                    3$ )Nzptorch.linalg.householder_product: Expected tau to have one dimension less than input, but got tau.ndim equal to  and input.ndim is equal to r  r   r/  s   r8   rg   r2    "    )),
2Nuzzl\r:   c                     > ST  3$ )Nzltorch.linalg.householder_product: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   actual_batch_tau_shapes   r8   rg   r2        66L5MOr:   c                  <   > STR                    ST R                    3$ )Nz,torch.linalg.householder_product: tau dtype z does not match input dtype r   r8  s   r8   rg   r2    s    :399+*5;;-9r:   z torch.linalg.householder_productr/  Fr  r   r   r`   r   )
rX   rj   r   r   r   r`   r  empty_stridedr   r   )r   r/  expected_batch_tau_shaper<  s   `` @r8   linalg_householder_productrB    sK   
 
LL

aZ 
LL

2%**R.(t 
LL

2#((2,&r
 
LL

SXX"	
 zzA~#(;;s#3 !$3B"&>>	
 
LL		U[[ 	
 6UEJ[[*5;;%Hkk||	 r:   c                    [        U S5        [        U SSS9  U R                  U R                  5      nUR	                  U R                  [        U R                  SS95        U R                  U R                  S S [        R                  S9nX#4$ )Nzlinalg.inv_exF)r  r  r  r   r   r   r   r   r   r   rX   rP  )r  r)  r  r-  s       r8   linalg_inv_ex_metarE    so    a)1o%P	AGGAMM!''6qww%PQKKEKKK8E8Or:   LDpivotsinfo)	hermitianr)  rI  c                t   [        U S5        [        U S5        [        R                  " U R                  [        U R                  SS9U R                  U R                  S9nU R                  U R                  S S [        R                  S9nU R                  U R                  S S [        R                  S9nX4U4$ )Nztorch.linalg.ldl_factor_exFr  r?  r   r   r  )
r   r   rX   r@  r   r   r`   r   r   r   )r   rI  r)  rF  rG  rH  s         r8   linalg_ldl_factor_ex_metarK    s     d894!=>			ZZ*4::Gjj{{	
B ^^DJJsO599^=F>>$**Sb/>;Dtr:   )rI  c                j  ^ ^^ [        T S5        [        T S5        [        TT S5        [        R                  " TR
                  S:  U4S j5        T R                  S S n[        R                  " UTR                  :H  U4S j5        [        R                  " [        R                  " TR                  5      U4S j5        [        R                  " T R                  TR                  :H  UU 4S j5        [        TT 5      u  pV[        R                  " U[        USS	9TR                  TR                  S
9$ )Nztorch.linalg.ldl_solver   c                  $   > ST R                    S3$ )NzMtorch.linalg.ldl_solve: Expected B to have at least 2 dimensions, but it has r  r  )r  s   r8   rg   'linalg_ldl_solve_meta.<locals>.<lambda>      &&!46r:   r   c                  $   > ST R                    S3$ )Nzjtorch.linalg.ldl_solve: Expected LD.shape[:-1] and pivots.shape to be the same, but got pivots with shape  insteadr   rG  s   r8   rg   rN        ))/h@r:   c                  "   > ST R                    3$ )Nz<torch.linalg.ldl_solve: Expected pivots to be integers. Got r   rS  s   r8   rg   rN    s    Nv||n]r:   c                  <   > STR                    ST R                    3$ )Nz!torch.linalg.ldl_solve: LD dtype z does not match b dtype r   )r  rF  s   r8   rg   rN    s     3BHH:=UVWV]V]U^_r:   Fr  r?  )r   r   r  rX   rj   r   r   rQ   is_integer_dtyper`   _linalg_broadcast_batch_dimsr@  r   r   )rF  rG  r  rI  expected_pivots_shapeB_broadcast_sizerT   s   ```    r8   linalg_ldl_solve_metar[    s     b232781b":;	LL	!	
 HHSbM	LL-	
 
LLv||,] 
LL
AGG_ 7q"=*+;uMggxx	 r:   Pr  )pivotr]  c                j  ^  [         R                  " T R                  S:  U 4S j5        [        T R                  5      nUS   nUS   n[        X45      nX2S'   U(       a  T R                  U5      nOT R                  S/5      nXRS'   T R                  U5      nXRS'   XBS'   T R                  U5      nXgU4$ )Nr   c                  $   > ST R                    S3$ )Nz@linalg.lu: Expected tensor with 2 or more dimensions. Got size: rQ  rR  r  s   r8   rg    linalg_lu_meta.<locals>.<lambda>
  s    RSTSZSZR[[cdr:   r  r   r   )rX   rj   r   r   r   r   r   )	r  r]  sizesrs  r  kr\  r  r  s	   `        r8   linalg_lu_metarc    s     
LL	!d
 MEb	Ab	AA"IKKKK"I	EA"I"I	EA7Nr:   LU)r]  r)  c                  ^  [         R                  " T R                  S:  U 4S j5        [        T R                  5      nUS   nUS   n[         R
                  " U[        USS9T R                  T R                  S9nUR                  5         [        XE5      US'   T R                  U[         R                  S9nUR                  5         T R                  U[         R                  S9nXgU4$ )	Nr   c                  $   > ST R                    S3$ )NzFtorch.lu_factor: Expected tensor with 2 or more dimensions. Got size: rQ  rR  r  s   r8   rg   *linalg_lu_factor_ex_meta.<locals>.<lambda>,  s    XYZY`Y`Xaaijr:   r  r   Fr  r?  r   )rX   rj   r   r   r   r@  r   r`   r   r  r   r   r   )	r  r]  r)  ra  rs  r  rd  rG  rH  s	   `        r8   linalg_lu_factor_ex_metarh  "  s     
LL	!j
 MEb	Ab	A			*5EBggxx	
B 
IIKE"I[[eii[0F 
IIK;;uEII;.Dtr:   )r   adjointri  c                
  ^ ^^ [        T S5        [        R                  " T R                  TR                  :H  UU 4S j5        [        R                  " TR                  [        R                  :H  S 5        [        T S5        [        T TUS5        [        R                  " T R                  S5      TR                  S5      :H  S 5        [        R                  " T R                  S S TR                  :H  U4S j5        [        TT 5      u  pV[        R                  " U[        XS(       + S9TR                  TR                  S	9nUR                  5       S
:w  a,  U(       d%  UR                  5       (       a  UR                  5       nU$ )Nztorch.linalg.lu_solvec                  >   > STR                    ST R                    S3$ )NzPlinalg.lu_solve: Expected LU and B to have the same dtype, but found LU of type  and B of type rQ  r   )r  rd  s   r8   rg   &linalg_lu_solve_meta.<locals>.<lambda>U  s#    $$&HH:_QWWIXOr:   c                      g)NzElinalg.lu_solve: pivots should be a Tensor of scalar type torch.int32r4   r4   r:   r8   rg   rm  \  s    Wr:   zlinalg.lu_solver   c                      g)NzYlinalg.lu_solve: Number of pivots per batch should be same as the dimension of the matrixr4   r4   r:   r8   rg   rm  d  s    kr:   c                  $   > ST R                    S3$ )Nzclinalg.lu_solve: Expected LU.shape[:-1] and pivots.shape to be the same, but got pivots with shape rQ  rR  rS  s   r8   rg   rm  j  rT  r:   r  r?  r   )r   rX   rj   r`   r   r   r  r   r   rX  r@  r   r   r   r   conj)rd  rG  r  r   ri  rZ  rT   r  s   ```     r8   linalg_lu_solve_metarr  G  s-    267	LL
AGG	
 
LL		!W b12b!T#45	LL
v{{2&k 
LL
"%	
 7q"=  *+;xPggxx	F ||~4[[]FMr:   unpack_dataunpack_pivotsc                 6  ^  [         R                  " T R                  S:  U 4S j5        U(       a3  [         R                  " UR                  [         R                  :H  S 5        [        T R                  5      nUS   nUS   n[        XV5      nXTS'   U(       a  T R                  U5      nOT R                  S/5      nU(       a/  XtS'   T R                  U5      n	XtS'   XdS'   T R                  U5      n
O$T R                  S/5      n	T R                  S/5      n
XU
4$ )Nr   c                  $   > ST R                    S3$ )NzFtorch.lu_unpack: Expected tensor with 2 or more dimensions. Got size: rQ  rR  )rd  s   r8   rg    lu_unpack_meta.<locals>.<lambda>  s    XY[YaYaXbbjkr:   c                      g)Nztorch.lu_unpack: LU_pivots is expected to be a contiguous tensor of torch.int32 dtype.
Note: this function is intended to be used with the output produced by torch.linalg.lu_factorr4   r4   r:   r8   rg   rw    s    pr:   r  r   r   )	rX   rj   r   r`   rP  r   r   r  r   )rd  rG  rs  rt  ra  rs  r  rb  r\  r  r  s   `          r8   lu_unpack_metary    s     
LL
1k LLEKK'	
 NEb	Ab	AA	A"ILLLL!b	LLb	b	LLLL!LL!7Nr:   modec                    ^  T S:X  a  SnSnX4$ T S:X  a  SnSnX4$ T S:X  a  SnSnX4$ [         R                  " SU 4S j5        WW4$ )NreducedTcompleteFrc                     > ST  S3$ )Nzqr received unrecognized mode 'z=' but expected one of 'reduced' (default), 'r', or 'complete'r4   )rz  s   r8   rg    _parse_qr_mode.<locals>.<lambda>  s    1$ 8N Or:   rX   rj   )rz  	compute_qr|  s   `  r8   _parse_qr_moder    s    y	  
		  
	  		
 gr:   QRc                    [        U S5        [        U S5        [        U5      u  p#U R                  S   nU R                  S   n[	        XE5      nU(       aO  [        U R                  5      nU(       a  UOUUS'   U R                  U5      nUR                  U[        USS95        OU R                  S/5      n[        U R                  5      n	U(       d  U(       d  UOUU	S'   U R                  U	5      n
U
R                  U	[        U	SS95        X4$ )Nz	linalg.qrr  r   Fr  r   )	r  r   r  r   r  r   r   r   r   )r  rz  r  reduced_moders  r  rb  Q_shaper  R_shaper  s              r8   linalg_qr_metar    s     ![!1k*,T2I	A	AA	Aqww-'aQKK 	g:7eTUKK 177mG#9!!GBK	GAMM'6w%PQ4Kr:   sign	logabsdetc                 t   [        U S5        [        U SS5        U R                  nU R                  US S 5      nU R                  US S [	        U R
                  5      S9n[        R                  " U[        US5      U R
                  U R                  S9nU R                  US S [        R                  S9nX#XE4$ )Nzlinalg.slogdetFr  r   r?  r   )r   r   r   r   rb   r`   rX   r@  r   r   rP  )r  r   r  r  rd  rG  s         r8   _linalg_slogdetr    s     a)*1.6GGE;;uSbz"DE#2Joagg.FGI			*5%8ggxx	
B [[s5;;[7FB&&r:   full_matrices
compute_uvdriverc                 b   [        U S5        [        U S5        [        U R                  S S 5      nU R                  S   nU R                  S   n[	        XV5      nU(       a  XEU(       a  UOU/-   nU R                  U5      n	U	R                  U[        USS95        XA(       a  UOUU/-   n
U R                  U
5      n[        U 5      S:H  nUR                  U
[        XS95        O$U R                  S/5      n	U R                  S/5      nU R                  XG/-   [        U R                  5      S9nXU4$ )	Nz
linalg.svdr  r   Fr  r   r   r   )r  r   r   r   r  r   r   r   r  rb   r`   )r  r  r  r  r   rs  r  rb  U_shaper  V_shapeVr  Ss                 r8   _linalg_svd_metar    s    !\"1l+aggcrl#J	A	AA	A11==KK 	g:7eTU]1==KK 
 a.F*	g:7VW KKKK 	
J$OAGG,DEA7Nr:   arg1arg2c                    U R                   S S nUR                   S S n[        X#5      n[        U5      nXPR                  S5      U R                  S5      /-  n[        U5      nXaR                  S5      UR                  S5      /-  nXV4$ )Nr  r   )r   r%   r   r   )r  r  arg1_batch_sizesarg2_batch_sizesexpand_batch_portionarg1_expand_sizearg2_expand_sizes          r8   rX  rX    s    
 zz#2zz#2,-=P012		"66012		"66--r:   c                     U(       a  [        XU5        [        X5      u  p4X0R                  :X  a  U OU R                  U5      nXAR                  :X  a  UOUR                  U5      nXV4$ rD   )r  rX  r   expand)r  r  r  r  r  arg1_broadcastedarg2_broadcasteds          r8   r   r   &  sh     t40)Ed)Q& !JJ.DKK@P4Q  !JJ.DKK@P4Q  --r:   r   c                     U R                   S S nUR                  S:H  =(       d2    U R                  S-
  UR                  :H  =(       a    UR                   U:H  nU$ )Nr   r3   )r   r   )r   r   expected_batched_rhs_shapevector_cases       r8   linalg_solve_is_vector_rhsr  :  sS    !&Sb!1**/ 

Q%**$R8R)R  r:   )r   r)  r  rd  rG  rH  c                  ^ ^ [        T S5        [        R                  " T R                  TR                  :H  U U4S j5        [	        T T5      nU(       a  TR                  S5      OTn	[        T XS5        [        U	T 5      u  p[        R                  " U=(       d    U(       + S 5        U(       a  U
S S OU
n[        R                  " U[        X(       + 5      TR                  TR                  S9nT R                  n[        R                  " U[        US5      T R                  T R                  S9nT R                  US S [        R                  S9nT R                  US S [        R                  S9nXEXg4nXUU4n[        S	 U 5       5      (       aa  [        UU5       HQ  u  nn[!        UUR                  5        UR#                  UR                  UR%                  5       5        ['        UUSS
9  MS     U$ )Nzlinalg.solvec                  >   > ST R                    STR                    S3$ )NzKlinalg.solve: Expected A and B to have the same dtype, but found A of type rl  rQ  r   )r  r  s   r8   rg   "_linalg_solve_ex.<locals>.<lambda>Q  s     Ywwiqwwix9r:   r   c                      g)Nzlinalg.solve: Vector broadcasting of the left hand side is not supported for left=False. In this case linalg.solve is equivalent to B / A.squeeze(-1)r4   r4   r:   r8   rg   r  \  s    Kr:   r?  Fr   r  c              3   (   #    U  H  oS Lv   M
     g 7frD   r4   )rv   r=   s     r8   rx   #_linalg_solve_ex.<locals>.<genexpr>s  s     
&#QD=#s   )	copy_fromcopy_toexact_dtype)r   rX   rj   r`   r  	unsqueezer  rX  r@  r   r   r   r   rP  allzipr!   r   r   r#   )r  r  r   r)  r  rd  rG  rH  r  B_B_broad_shaperT   result_shaperesult_r   LU_pivots_info_r   resr~  os   ``                    r8   _linalg_solve_exr  B  s    1n-	LL	177	
 -Q2K'RQBa>23B:M	LLK	
 *5="%-L!!*<Bggxx	G GGE


*5%8ggxx	C kk%*EKKk8GKKcr
%++K6Ev
$C%
(C

&#
&&&SMDAqa)MM!''188:.QuE " Jr:   )r   unitriangularr   r  r   c                   Uc  U R                  S/5      n[        U[        5      (       d  [        S[	        U5       35      e[        XUS5        [        XS 5      u  pgUR                  SS5      R                  5       =(       a    UR                  5       nU(       a  [        XVR                  5      nU$ [        XVR                  5      (       a=  UR                  UR                  SS5      R                  5        UR                  SS5        U$ )Nr   zout must be TensorLike, got zlinalg.solve_triangularr  r   )r   rt   r   r   r   r  r   r  rq  is_conjr!   r   r"   r   
transpose_)	r  r  r  r   r  r   r  A_avoid_copy_As	            r8   linalg_solve_triangular_metar  }  s     {kk1#c:&&;DI;GHHaD";<.qT:FB<<B'557HBJJLLXX. J  XX..KKR,223NN2r"Jr:   XM)r  r  c                   ^ ^ [         R                  " T R                  S:  U 4S j5        [         R                  " TR                  S:  U4S j5        [        T TS5        TR                  [         R
                  :X  aw  [        T T5      u  pV[         R                  " U[        USS9T R                  T R                  S9n[         R                  " U[        USS9TR                  TR                  S9nXx4$ TR                  [         R                  :X  d  TR                  [         R                  :X  a+  [         R                  " T 5      nT R                  S/5      nXx4$ [         R                  " SS	 5        WW4$ )
Nr   c                  $   > ST R                    S3$ )NzMtorch.triangular_solve: Expected b to have at least 2 dimensions, but it has r  r  r   s   r8   rg   'triangular_solve_meta.<locals>.<lambda>  s    ))$79r:   c                  $   > ST R                    S3$ )NzMtorch.triangular_solve: Expected A to have at least 2 dimensions, but it has r  r  r  s   r8   rg   r    rO  r:   triangular_solveFr  r?  r   c                      g)Nz+triangular_solve: Got an unexpected layout.r4   r4   r:   r8   rg   r    s    $Qr:   )rX   rj   r   r  r   stridedrX  r@  r   r`   r   
sparse_csr
sparse_bsrr   r   )	r   r  r  r  r  self_broadcast_sizeA_broadcast_sizesolutioncloned_coefficients	   ``       r8   triangular_solve_metar    sC    
LL		Q	
 
LL	!	
 4$67xx5== 0LTST0U-&&$./BeT**;;	
 #00!./?5Q''88	
 '' 
U%%	%U5E5E)E##D)!^^QC0 '' 	UQR'''r:   c                 ^   [        U S5        [        U S5        U R                  U R                  S S 5      nU R                  U R                  5      nUR	                  U R                  [        U R                  SS95        U R                  U R                  S S [        R                  S9nXU4$ )Nz
linalg.detr  Fr  r   r   rD  )r  detrd  rG  s       r8   _linalg_det_metar    s    a&1l+
++aggcrl
#C	
QWW	BNN17775QR[["U[[[9FF?r:   c                 X  ^ ^^^^^ [         R                  " T R                  S:  S 5        [         R                  " TR                  S:  S 5        U(       a  SOSm[         R                  " TR                  T   TR                  S   :  U4S j5        [         R                  " TR                  T   T R                  S   :H  U4S j5        [         R                  " TR                  S   T R                  S   :*  S 5        [         R                  " T R                  TR                  -
  S	:H  U U4S
 j5        [         R                  " T R                  TR                  :H  U U4S j5        T R                  S:  ai  T R                  S S nTR                  S S m[         R                  " TU:H  U4S j5        TR                  S S m[         R                  " TU:H  U4S j5        [         R                  " TR                  T R                  :H  U U4S j5        [         R                  " TR                  T R                  :H  U U4S j5        [        STT S5        [        STT S5        [         R                  " TR                  [        TR                  SS9TR                  TR                  S9$ )Nr   c                      g)Nz3torch.ormqr: input must have at least 2 dimensions.r4   r4   r:   r8   rg   ormqr.<locals>.<lambda>      !Vr:   c                      g)Nz3torch.ormqr: other must have at least 2 dimensions.r4   r4   r:   r8   rg   r    r  r:   r  r   c                     > ST  S3$ )Ntorch.ormqr: other.shape[z0] must be greater than or equal to tau.shape[-1]r4   left_size_conditions   r8   rg   r    s    +,?+@@pqr:   c                     > ST  S3$ )Nr  z"] must be equal to input.shape[-2]r4   r  s   r8   rg   r    s    +,?+@@bcr:   c                      g)NzHtorch.ormqr: tau.shape[-1] must be less than or equal to input.shape[-1]r4   r4   r:   r8   rg   r    r3  r:   r3   c                  <   > STR                    ST R                    3$ )Nz[torch.ormqr: Expected tau to have one dimension less than input, but got tau.ndim equal to r7  r  r8  s   r8   rg   r    r9  r:   c                  <   > STR                    ST R                    3$ )Nzhtorch.ormqr: Expected other to have the same number of dimensions as input, but got other.ndim equal to r7  r  r   r   s   r8   rg   r    s&    ++0::,6RSXS]S]R^`r:   c                     > ST  3$ )NzWtorch.ormqr: Expected batch dimensions of tau to be equal to input.shape[:-2], but got r4   r;  s   r8   rg   r  
  r=  r:   c                     > ST  3$ )NzYtorch.ormqr: Expected batch dimensions of other to be equal to input.shape[:-2], but got r4   )actual_batch_other_shapes   r8   rg   r    s    66N5OQr:   c                  <   > ST R                    STR                    3$ )NzPtorch.ormqr: Expected input and tau to have the same dtype, but input has dtype z and tau has dtype r   r8  s   r8   rg   r    s"    ##(;;-/B399+Or:   c                  <   > ST R                    STR                    3$ )NzRtorch.ormqr: Expected input and other to have the same dtype, but input has dtype z and other has dtype r   r  s   r8   rg   r  "  s"    ##(;;-/DU[[MSr:   ztorch.ormqrr/  r   Fr  r?  )	rX   rj   r   r   r`   r  r@  r   r   )	r   r/  r   r   r  expected_batch_shaper  r<  r  s	   ```   @@@r8   ormqrr    s    
LL

aV 
LL

aV !%""	LL'(CIIbM9q 
LL'(EKKO;c
 
LL		"R(Z
 
LL

SXX"	
 
LL

ejj 	
 zzA~${{3B/!$3B"&::	
 $);;s#3 $(<<	
 
LL		U[[ 	
 
LLu{{"	
 M3u5M5%9[[*5;;%Hkk||	 r:   c                  ^ ^^ [         R                  " [        T5      ST-  :H  UU4S j5        T R                  nUTS-   :H  nUnU(       + nU(       a1  [	        SU5       H   nU=(       a    T R                  U5      S:g  nM"     O/[	        U5       H   nU=(       a    T R                  U5      S:g  nM"     [         R                  " U=(       d    UUU 4S j5        g )Nr   c                  ,   > SST -   S[        T5       3$ )Nzpadding size is expected to be r   z, but got: r   )r   paddings   r8   rg   ,_padding_check_valid_input.<locals>.<lambda>6  s    1!c'+c'l^Tr:   r3   r   c                  :   > ST S-    ST S-    STR                    3$ )N	Expected r3   zD or r   zcD (batch mode) tensor with possibly 0 batch size and other non-zero dimensions for input, but got: rR  )r   r   s   r8   rg   r  K  s-    aycAgY /AAFOr:   )rX   rj   r   r   r   r   )r   r  r   	input_dimis_batch_modevalid_batch_modevalid_non_batch_moder   s   ```     r8   _padding_check_valid_inputr  3  s    	LLGCT
 

I#'*M$,,q)$A/FEJJqMQ4F % y!A#7#NEJJqMQ<N  " 
LL00	
r:   c                  ^ ^^^^	^
 SnSmSnT R                   S:X  a  T R                  S5      nTS-  mUS-  n[        T USS9  Uu  m	m
T R                  U5      nT R                  T5      mTT	-   T
-   mU(       a-  [        R                  " T	T:  =(       a    T
T:  UU U	U
4S j5        [        R                  " TS:  UU4S j5        T R                   S:X  a  T R                  UT45      $ T R                  XET45      $ )Nr   r3   r0   r   c                  4   > ST ST ST  STR                    3$ NzcArgument #4: Padding size should be less than the corresponding input dimension, but got: padding (r}   ) at dimension 
 of input rR  dim_wr   pad_lpad_rs   r8   rg   _pad1d_common.<locals>.<lambda>g  -    %%*G2eWOE7*UZU`U`Tacr:   c                     > ST  ST 3$ )Nz
input (W: z%) is too small. Calculated output W: r4   )input_woutput_ws   r8   rg   r  o  s    *WI%J8*Ur:   r   )r   r   r  rX   rj   r   )r   r  is_reflection	dim_planenbatchnplaner  r  r  r   r  s   `     @@@@@r8   _pad1d_commonr  R  s    IEFzzQA
Q	ug15LE5ZZ	"FjjG&HGO/	
 
LLAU
 zzQ1229::r:   c                     [        XSS9$ NTr  )r  r   r  s     r8   meta_reflection_pad1dr  x       t<<r:   c                    ^  [         R                  " T R                  [         R                  :g  U 4S j5        [	        T USS9$ )Nc                  @   > ST R                   R                  5        S3$ )Nz)"replication_pad1d" not implemented for ''r`   __str__r   s   r8   rg   (meta_replication_pad1d.<locals>.<lambda>      =ekk>Q>Q>S=TTUXr:   Fr  )rX   rj   r`   boolr  r  s   ` r8   meta_replication_pad1dr  ~  5     
LLuzz!X u==r:   c                  ^ ^^^^^ SmU(       d$  [         R                  " [        U5      S:H  S 5        TR                  S:X  a  TS-  mUu  mmTR	                  T5      nUT-   T-   mU(       a-  [         R                  " TU:  =(       a    TU:  UUUU4S j5        [         R                  " TT R	                  T5      :H  UU U4S j5        TR                  TR                  5      $ )Nr3   r   c                      g)Nz padding size is expected to be 2r4   r4   r:   r8   rg   (_pad1d_backward_common.<locals>.<lambda>  s    0Rr:   r0   c                  4   > ST ST ST  STR                    3$ r  rR  r  s   r8   rg   r    r  r:   c                  2   > ST STR                  T 5       3$ Nz(grad_output width unexpected. Expected: , Got: r   r  grad_outputr  s   r8   rg   r         :8*GKL\L\]bLcKder:   )rX   rj   r   r   r   r   r   )	r%  r   r  r  r  r  r  r   r  s	   ``   @@@@r8   _pad1d_backward_commonr'    s    ES\Q&(RSzzQ
LE5jjG&HGO/	
 
LLK$$U++e
 ??5;;''r:   
grad_inputc                     [        XUSS9$ r  r'  r%  r   r  s      r8   meta_reflection_pad1d_backwardr,    s     "+gTRRr:   c                     [        XUSS9$ )NFr  r*  r+  s      r8   meta_replication_pad1d_backwardr.    s     "+gUSSr:   c                  ^ ^^^	^
^^^^^^ SmSmSnSn[        T USS9  T R                  nUS:X  a   T R                  S5      nTS-  mTS-  mUS-  nUu  mmmmT R                  U5      nT R                  T5      m	T R                  T5      m
T	T-   T-   mT
T-   T-   mU(       aZ  [        R                  " TT
:  =(       a    TT
:  UU UU4S j5        [        R                  " TT	:  =(       a    TT	:  UU UU4S j5        [        R                  " TS:  =(       d    TS:  U	U
UU4S j5        T R                  S	:X  a  T R                  UTT45      $ T R                  XFTT45      $ )
Nr   r3   r   r      c                  4   > ST ST ST  STR                    3$ r  rR  r  s   r8   rg   _pad2d_common.<locals>.<lambda>  r  r:   c                  4   > ST ST ST  STR                    3$ NzcArgument #6: Padding size should be less than the corresponding input dimension, but got: padding (r}   r  r  rR  dim_hr   pad_bpad_ts   r8   rg   r2    r  r:   c                      > ST  ST ST ST 3$ )Nz
input (H:  W: z%) is too small. Calculated output H: r4   )input_hr  output_hr  s   r8   rg   r2    s%    	gY /$$,:T(=r:   r0   r  r   r   rX   rj   r   )r   r  r  
dim_slicesr	  r   r
  r6  r  r;  r  r<  r  r7  r   r  r8  s   `      @@@@@@@@@@r8   _pad2d_commonr?    sS   EEJFug15::DqyA

a
!(E5%ZZ
#FjjGjjG&H&HGO/	
 	GO/	
 
LLA&Q	
 zzQ(;<<(CDDr:   c                     [        XSS9$ r  )r?  r  s     r8   meta_reflection_pad2drA    r  r:   c                    ^  [         R                  " T R                  [         R                  :g  U 4S j5        [	        T USS9$ )Nc                  @   > ST R                   R                  5        S3$ )Nz)"replication_pad2d" not implemented for 'r  r  r  s   r8   rg   (meta_replication_pad2d.<locals>.<lambda>  r  r:   Fr  )rX   rj   r`   r  r?  r  s   ` r8   meta_replication_pad2drE    r  r:   c                 `    [         R                  " U5      n[         R                  " U5      nXV4$ rD   rX   r   )grad_wsaved_vsaved_gsaved_normsr   grad_vgrad_gs          r8   meta_weight_norm_backwardrN    s,     g&Fg&F>r:   c                   ^ ^^^^ SmSmSnUR                   nUR                  5       S:X  a  TS-  mTS-  mUS-  nUu  pVpxUT   n	UT   n
X-   U-   mX-   U-   m[        R                  " TT R	                  T5      :H  UU U4S j5        [        R                  " TT R	                  T5      :H  UU U4S j5        UR                  UR                   5      $ )Nr   r3   r   r0  c                  2   > ST STR                  T 5       3$ r"  r   r$  s   r8   rg   %meta_pad2d_backward.<locals>.<lambda>  r&  r:   c                  2   > ST STR                  T 5       3$ Nz)grad_output height unexpected. Expected: r#  r   r6  r%  r<  s   r8   rg   rQ  !       ;H:W[M]M]^cMdLefr:   )r   r   rX   rj   r   r   )r%  r   r  r  rf   r   r  r8  r7  r;  r  r6  r  r<  r  s   `          @@@@r8   meta_pad2d_backwardrV     s     EEIJxxzQ

Q	!(E%GG&H&H	LLK$$U++e 
LLK$$U++f >>$**%%r:   c          	      ~  ^ ^^^	^
^^^^^^^^^^^ Sm	SmSmSn[        T USS9  T R                  S:H  nU(       a%  T R                  S5      nT	S-  m	TS-  mTS-  mUS-  nUu  mmmmmmT R                  U5      nT R                  T5      m
T R                  T5      mT R                  T	5      mT
T-   T-   mTT-   T-   mTT-   T-   mU(       a  [        R                  " TT:  =(       a    TT:  U	U UU4S j5        [        R                  " TT:  =(       a    TT:  UU UU4S j5        [        R                  " TT
:  =(       a    TT
:  UU UU4S	 j5        [        R                  " TS:  =(       d    TS:  =(       d    TS:  U
UUUUU4S
 j5        U(       a  T R                  WUTTT45      $ T R                  UTTT45      $ )Nr0   r   r3   r   r      c                  4   > ST ST ST  STR                    3$ r  rR  r  s   r8   rg   _pad3d_common.<locals>.<lambda>C  r  r:   c                  4   > ST ST ST  STR                    3$ r4  rR  r5  s   r8   rg   rZ  J  r  r:   c                  4   > ST ST ST  STR                    3$ )NzcArgument #8: Padding size should be less than the corresponding input dimension, but got: padding (r}   r  r  rR  )dim_dr   pad_bkpad_fs   r8   rg   rZ  Q  s-    %%*G2fX_UG:V[VaVaUbdr:   c                  ,   > ST  ST ST ST ST ST 3$ )Nz
input (D:  H: r:  z%) is too small. Calculated output D: r4   )input_dr;  r  output_dr<  r  s   r8   rg   rZ  Y  s2    	gYd7) <$$,:T(4zKr:   r=  )r   r  r  r  
batch_moder	  r
  r]  r6  r  rb  r;  r  rc  r<  r  r7  r^  r_  r   r  r8  s   `      @@@@@@@@@@@@@@@r8   _pad3d_commonre  &  s   EEEIug15qJA


Q	07-E5%vZZ	"FjjGjjGjjG'H&H&HGO/	
 	GO/	
 	GO0 0	
 
LLA7Q7(a-	
 	
 (HMNN(HEFFr:   c                     [        XSS9$ r  )re  r  s     r8   meta_reflection_pad3drg  e  r  r:   c                    ^  [         R                  " T R                  [         R                  :g  U 4S j5        [	        T USS9$ )Nc                  @   > ST R                   R                  5        S3$ )Nz)"replication_pad3d" not implemented for 'r  r  r  s   r8   rg   (meta_replication_pad3d.<locals>.<lambda>p  r  r:   Fr  )rX   rj   r`   r  re  r  s   ` r8   meta_replication_pad3drk  k  r  r:   c                 \  ^ ^^^^^^ [         R                  " [        U5      S:H  S 5        UR                  S::  a  [	        SUR                   35      eT R                  UR                  :w  a%  [	        ST R                   SUR                   35      eSmSmSmUR                  S	:X  a  TS-  mTS-  mTS-  mUu  p4pVpxUR                  T5      n	UR                  T5      n
UR                  T5      nX-   U-   mX-   U-   mX-   U-   m[         R                  " TT R                  T5      :H  UU U4S
 j5        [         R                  " TT R                  T5      :H  UU U4S j5        [         R                  " TT R                  T5      :H  UU U4S j5        UR                  UR                  5      $ )N   c                      g)Nz padding size is expected to be 6r4   r4   r:   r8   rg   %meta_pad3d_backward.<locals>.<lambda>  s    ,Nr:   r0   zinput.ndim must be > 3, got z,grad_output.ndim must equal input.ndim, got  != r   r3   rX  c                  2   > ST STR                  T 5       3$ r"  r   r$  s   r8   rg   ro    r&  r:   c                  2   > ST STR                  T 5       3$ rS  r   rT  s   r8   rg   ro    rU  r:   c                  2   > ST STR                  T 5       3$ )Nz(grad_output depth unexpected. Expected: r#  r   )r]  r%  rc  s   r8   rg   ro    r&  r:   )rX   rj   r   r   r   r   r   r   )r%  r   r  r   r  r8  r7  r_  r^  rb  r;  r  r]  r6  r  rc  r<  r  s   `           @@@@@@r8   meta_pad3d_backwardrt  u  s    
LLW"$NOzzQ;EJJ<HII5::%:;;K;K:LDQVQ[Q[P\]
 	
 EEEzzQ


07-E%jjGjjGjjG'H&H&H	LLK$$U++e 
LLK$$U++f 
LLK$$U++e
 ??5;;''r:   pc                 @   [         R                  " U R                  5       S 5        U R                  S5      nUS::  a-  U R	                  S/5      R                  [         R                  S9$ U R	                  X"S-
  -  S-  45      R                  [         R                  S9$ )Nc                      g)Nz(_pdist_forward requires contiguous inputr4   r4   r:   r8   rg   %meta__pdist_forward.<locals>.<lambda>  s    &Pr:   r   r3   r   r   )rX   rj   rq  r   r   r4  r%  )r   ru  r  s      r8   meta__pdist_forwardry    s     
LLP 			!AAv~~qc"%%E4R4R%SS~~qE{a/125588 6 
 	
r:   gradpdistc                     [         R                  " UR                  5       S 5        [         R                  " UR                  5       S 5        [         R                  " U[         R                  S9$ )Nc                      g)Nz._pdist_backward requires self to be contiguousr4   r4   r:   r8   rg   &meta__pdist_backward.<locals>.<lambda>  s    &Vr:   c                      g)Nz/_pdist_backward requires pdist to be contiguousr4   r4   r:   r8   rg   r~    s    'Xr:   r   )rX   rj   rq  r   r%  )rz  r   ru  r{  s       r8   meta__pdist_backwardr    sW     
LLV 
LLX D0N0NOOr:   )rc  rb  c          
      b  ^ ^^^^^ SSK JnJn  TR                  S5      nTR                  S5      nTR                  S5      n	U" [        R
                  " U" T R                  XxU	45      5      5      (       a  T R                  XxU	45      m [        R                  " TR                  5       S:H  S 5        [        R                  " TR                  5       S:H  S 5        [        R                  (       dN  [        R                  " T R                  TR                  s=:H  =(       a    TR                  :H  Os  UUU 4S j5        TR                  n
TR                  mU
S   mU
S   m[        R                  " TS   T:H  =(       a    TS   T:H  UUU4S	 j5        T R                  T R                  5       5      $ )
Nr   )guard_or_truesym_eqr3   r   r0   c                      gNzbatch1 must be a 3D tensorr4   r4   r:   r8   rg   meta_baddbmm.<locals>.<lambda>      ,Hr:   c                      gNzbatch2 must be a 3D tensorr4   r4   r:   r8   rg   r    r  r:   c                  V   > STR                    ST R                    STR                    3$ )Nz+Input dtypes must be the same, got: input: z
, batch1: z
, batch2: r   )batch1batch2r   s   r8   rg   r    s.    A$**ZX^XdXdWeeopvp|p|o}~r:   c            	      .   > ST ST ST S    ST S    S3	$ Nz@Expected size for first two dimensions of batch2 tensor to be: [r}   z] but got: [r   r3   ].r4   batch2_sizesbscontraction_sizes   r8   rg   r    s5    t2&'|LO3DB|TUFWWY[r:   )r1  r  r  r   rX   sym_notr   r  rj   r   
exp_config&skip_dtype_check_in_meta_registrationsr`   r   )r   r  r  rc  rb  r  r  dim1dim2dim3batch1_sizesr  r  r  s   ```        @@@r8   meta_baddbmmr    s9    L;;q>D;;q>D;;q>DU]]6$**t46H#IJKK{{D-.	LL"$HI	LL"$HI<<JJ&,,66&,,6~	
 <<L<<L	aB#A	LLQ2E,q/5E"E	
 >>$))+&&r:   c                H    [         R                  " U [         R                  S9$ r   rz  r   r  s     r8   meta_bernoullir    s     D0G0GHHr:   c                     U $ rD   r4   r   ru  r  s      r8   meta_bernoulli_r        Kr:   c                 H    [         R                  " U [         R                  S9$ r   rz  r  s      r8   meta_bernoulli_pr    s     D0G0GHHr:   c                 .    [         R                  " U 5      $ rD   rG  r  s     r8   meta_poissonr         D!!r:   c                     [         R                  " XR                  5       :  S 5        [         R                  " U [         R                  S9n[         R                  " U 5      U4$ )Nc                      g)NzJError in fused_moving_avg_obs_fake_quant_cpu: ch_axis must be < self.dim()r4   r4   r:   r8   rg   6meta__fused_moving_avg_obs_fq_helper.<locals>.<lambda>	      \r:   r   )rX   rj   r   r   r  )r   observer_onfake_quant_onrunning_minrunning_maxscale
zero_pointaveraging_const	quant_min	quant_maxch_axisper_row_fake_quantsymmetric_quantmasks                 r8   $meta__fused_moving_avg_obs_fq_helperr    sM      
LL((*\ D

3DT"D))r:   c                 \  ^^^^ [         R                  " U R                  5       S:H  S 5        [         R                  " UR                  5       S:H  S 5        U R                  u  mmUR                  u  mm[         R                  " TT:H  UUUU4S j5        Ubr  [         R                  " X R                  :H  =(       dG    U[         R
                  :H  =(       a-    U R                  [         R                  [         R                  4;   S 5        Uc  U R                  OUnU R                  TT4US9$ )Nr   c                      g)Nza must be 2Dr4   r4   r:   r8   rg   meta_mm.<locals>.<lambda>	      ~r:   c                      g)Nzb must be 2Dr4   r4   r:   r8   rg   r  	  r  r:   c            	      "   > ST ST  ST ST S3	$ )Nz/a and b must have same reduction dim, but got [r}   z] X [r  r4   )M1M2Nr\  s   r8   rg   r  	  s&    A!Brd%PRtSUVWUXXZ[r:   c                      g)NzFout_dtype must be the same as input dtype or fp32 for fp16/bf16 inputsr4   r4   r:   r8   rg   r  %	  s    \r:   r   )	rX   rj   r   r   r`   rm  rn  ro  r   )r6   r7   rJ  rU   r  r  r  r\  s       @@@@r8   meta_mmr  	  s     
LLA56	LLA56GGEArGGEB	LL
b[   U]]* ?GGu~~>>\	
 (/177YL;;1v\;22r:   c                    ^ ^ U(       a)  [        UU 4S j[        T R                  5       5       5      $ [        R                  " T R
                  T5      $ )Nc              3   P   >#    U  H  oT;  a  TR                   U   OS v   M     g7f)r3   NrR  )rv   r   dimsr   s     r8   rx   +_compute_reduction_shape.<locals>.<genexpr>-	  s$     UDTqtmTZZ]:DTs   #&)ri   r   r   rQ   compute_reduction_output_shaper   )r   r  r  s   `` r8   r  r  +	  s7    UE$))DTUUU//

DAAr:   c                 :   [        U [        R                  R                  5      (       a  U R                  R
                  $ [        U S5      (       aK  [        U R                  S5      (       a0  U R                  R
                  S:w  a  U R                  R
                  $ g)Nr   r   r   r   )rt   rX   _subclasses
FakeTensorfake_devicer   hasattrr   )r=  s    r8   r  r  6	  sp    &%++6677!!&&&!!FMM6**MM&(}}!!!r:   input_tensorr   r  dilationis_transposedgroupsoutput_paddingc                 B  ^^ S[         S[         S[         S[         S[         S[         4S jnS[         S[         S[         S[         S[         S[         S[         4S	 jn	UR                  S
S  n
U R                  S
S  mU(       a  XaR                  S   -  nO=UR                  S   nUR                  S   U-  U R                  S   :w  a  [        S5      eU R                  S   U/m[        U[        5      (       a  U/[        T5      -  nO![        U5      S:X  a  US   /[        T5      -  n[        U[        5      (       a  U/[        T5      -  nO![        U5      S:X  a  US   /[        T5      -  n[        U[        5      (       a  U/[        T5      -  nO![        U5      S:X  a  US   /[        T5      -  nS nU(       aI  [        U[        5      (       a  U/[        T5      -  nO$[        U5      S:X  a  US   /[        T5      -  nOUn[        [        T5      5       H[  nU(       a+  TR                  U	" TU   X=   XM   X   X-   X   5      5        M5  TR                  U" TU   X=   XM   X   X-   5      5        M]     SSKJ	n  SSK
Jn  [        X5      (       a  U R                  OU R                  nUR                  S:H  =(       a    [        R                   R"                  S L nU(       d6  [        R$                  " U" TS
S   Vs/ s H  nUS:  PM
     sn6 UU4S j5        T$ s  snf )Nlnru  r   rb  r  r@   c                 4    U SU-  -   X#S-
  -  -
  S-
  U-  S-   $ )aE  
Formula to apply to calculate the length of some dimension of the output

See: https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html

Args:
    ln: length of the dimension
    p: padding in that dim
    d: dilation in that dim
    k: kernel size in that dim
    s: stride in that dim
Returns:
    The output length
r   r3   r4   )r  ru  r   rb  r  s        r8   _formula+calc_conv_nd_return_shape.<locals>._formulaM	  s,     QU
Qa%[(1,2Q66r:   rE   c                 :    U S-
  U-  SU-  -
  X#S-
  -  -   U-   S-   $ )a  
Formula to apply to calculate the length of some dimension of the output
if transposed convolution is used.
See: https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html

Args:
    ln: length of the dimension
    p: padding in that dim
    d: dilation in that dim
    k: kernel size in that dim
    s: stride in that dim
    op: output padding in that dim

Returns:
    The output length
r3   r   r4   )r  ru  r   rb  r  rE   s         r8   _formula_transposed6calc_conv_nd_return_shape.<locals>._formula_transposed^	  s0    " Q!|a!e#aq5k1B6::r:   r   r3   r   zInvalid channel dimensions)r  )sym_orr   c                  .   > S[        T 5       STSS   S3$ )NzGiven input size per channel: z&. Calculated output size per channel: r   z. Output size is too small)r   )r  	ret_shapes   r8   rg   +calc_conv_nd_return_shape.<locals>.<lambda>	  s*    4T$ZL A33<QR=/ B'(r:   )r   r   r3  rt   r   r   r   r   torch._subclasses.fake_tensorr  r1  r  r  r   r   rX   versionhiprj   )r  rF  r   r  r  r  r  r  r  r  kernel_sizeout_channelsoutput_padding_listr   r  r  r   is_cudnnr=   r  r  s                      @@r8   calc_conv_nd_return_shaper  C	  s   7S 7S 7S 7S 7S 7S 7"; ; ; ; ; ; ;QT ;& ,,qr"Kab!DQ/||A<<?V#|'9'9!'<<;<<##A&5I&'""CI%	V	)s4y('7##)c$i'	W	1:,T*(G$$:D	)	X!	QK=3t9,,0ng..#1"2SY"> A%#1!#4"5D	"A"03t9#GJ KNI'* a'*hk;>69U% 6 9< l// 	      {{f$B):):d)BHIabM2MqQUM23(	
  3s   =Lc                 b    [         R                  R                  U 5      [         R                  :H  $ rD   rX   _prims_commonr   channels_lasttens    r8   is_channels_lastr  	  s$    44S9U=P=PPPr:   running_meanrunning_vartrainingexponential_average_factorepsilonc                 z  ^  T R                   nUb  UR                   OUR                   n	Ub  UR                   OUR                   n
U 4S jnT R                  U5      R                  U" 5       S9nU(       a#  T R                  U	5      nT R                  U
5      nO"T R                  S5      nT R                  S5      nXU4$ )Nc                     > [        T 5      (       a  [        R                  $ T R                  [        R                  S9(       a  [        R                  $ [        R                  $ r   )r  rX   r  rq  r   )r  s   r8   pick_memory_format2meta_miopen_batch_norm.<locals>.pick_memory_format	  sI    L))&&&%%E4K4K%L***&&&r:   r   r   )r   r   r4  )r  rF  rH  r  r  r  r  r  r   save_mean_shapesave_var_shaper  r   	save_meansave_vars   `              r8   meta_miopen_batch_normr  	  s     ""I -9,Dl((&,,O*5*A[&&v||N' 
 
 
+
.
.=O=Q
.
RC **?;	)).9 **40	))$/8##r:   c	           
          [        U UUUUUUU(       a  UOS 5      n	Sn
SnU R                  U
5      S:X  a  SX'   U R                  U	5      nU$ Nr3   r   )r  r   r   )r  rF  rH  r   r  r  r  r  r  	shape_outinput_channels_dimoutput_channels_dimr   s                r8   	meta_convr  	  si     *'T	I +,1)*	&

 
 
+CJr:   mkldnnc
           
          [        XXCUSU/ 5      n
U R                  U
5      n[        R                  nU R	                  5       S:X  a  [        R
                  nUR                  US9nU$ )NFrX  r   )r  r   rX   r  r   channels_last_3dr4  )r  rF  rH  r  r   r  r  attrscalars	algorithmr   r   out_memory_formats                r8   meta_mkldnn_convolution_defaultr  
  sl     .&8UFB
	 $$Y/!//" % 6 6ff#4f5
r:   c                 b    U R                  / U R                  S S QUR                  S   P75      $ Nr   r   r   r   )r  rF  rH  r  r  r	  s         r8   meta_linear_pointwise_defaultr  .
  s5     %%&Q(:(:3B(?&Qa&QRRr:   mklc                 b    U R                  / U R                  S S QUR                  S   P75      $ r  r  )r  packed_weightorig_weightrH  r   s        r8   meta_mkl_linearr  9
  s:    ))@,$$Sb)@;+<+<Q+?@ r:   onednnc           
      
   [        U UUUU	SU
S 5      nUc  U R                  nU[        R                  [        R                  [        R
                  [        R                  [        R                  4;  a  [        SU 35      eU R                  UUS9n[        U5      S;  a  [        S[        U5       S35      e[        R                  [        R                  [        R                  S.[        U5         nUR                  US9nU$ )NFOoutput_dtype must be one of float32, bfloat16, uint8, int8, float8_e4m3fn, got r   )r0   r0  rX  z3Expect output to be 3d/4d/5d for conv1d/2d/3d, got r   r   )r  r`   rX   rm  ro  uint8rO  rp  r   r   r   r   r  r  r4  )r=   x_scalex_zpww_scalew_zprH  r   r  r  r  output_scaleoutput_zero_pointoutput_dtyper  r  r	  r   r   formats                       r8   meta_qconv_pointwiser"  C
  s   , .	
	 77LMMNNKKJJ 
 
 !abnaop  kk)<k8y>* Ec)nEUUVW  &&""%%
 i.	
 ff6f*
r:   c                 0    US:w  a  [        SU S35      eU$ )Nsumz#binary_op_name must be 'sum', got 'r  r   )r=   r  r  r  r  r  accumrH  r   r  r  r  r  r  r   accum_scaleaccum_zero_pointbinary_op_namerb  unary_op_nameunary_op_argsunary_op_algorithms                         r8   meta_qconv2d_pointwise_binaryr-  |
  s-    4 U" 5n5EQG  r:   c                 0   [        U R                  5      nUR                  S   US'   U	[        R                  [        R                  [        R
                  [        R                  [        R                  4;  a  [        SU	 35      eU R                  XS9nU$ )Nr3   r   zOoutput_dtype must be one of float32, bfloat16, int8, uint8, float8_e4m3fn, got r   )
r   r   rX   rm  ro  rO  r  rp  r   r   )r=   r  r  r  r  r  rH  r  r  r   post_op_namepost_op_argspost_op_algorithmrt  r   s                  r8   meta_qlinear_pointwiser2  
  s    " AGG}771:RMMNNJJKK 
 
 !abnaop  kk,k;
r:   c                 B   US:X  a  U$ [        U R                  5      nUR                  S   US'   U
[        R                  [        R                  [        R
                  [        R                  [        R                  4;  a  [        SU
 35      eU R                  UU
S9nU$ )Nr$  r3   r   r  r   )
r   r   rX   rm  ro  r  rO  rp  r   r   )r=   r  r  r  r  r  x_2rH  r  r  r   x2_scalex2_zpr)  rb  r*  r+  r,  rt  r   s                       r8   meta_qlinear_pointwise_binaryr7  
  s    , U"JAGG}771:RMMNNKKJJ 
 
 !abnaop  kk,lk;
r:   c                 v    [        U R                  5      nUR                  S   US'   U R                  U5      nU$ )Nr3   r   )r   r   r   )r=   r  rH  rt  r   s        r8   meta_linear_dynamic_fp16r9  
  s6     AGG}771:Rkk,'
r:   	quantizedr  r3   c                 "   [        XX#XE5      u  nnnU R                  5       S:X  a  U R                  S5      OSn	[        R                  n
U R                  5       S:X  a  XgU/nOXXx/n[        R
                  " UU R                  U R                  U
S9$ Nr0  r3   r0   r  )#max_pool2d_checks_and_compute_shaper   r   rX   r  r   r`   r   r   r  r   r  r  	ceil_modenInputPlaneoutputHeightoutputWidthr	  r   r   s               r8   meta_quantized_max_pool2drE  
  s     0
		
 $)99;!#3B++99;!{;DCD{{++<<'	
 	
r:   c                   ^ ^^^ [         R                  " T R                  5       S:H  U 4S j5        [         R                  " TR                  5       S:H  U4S j5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        [         R                  " TR                  [         R                  :H  U4S j5        [         R                  " TR                  [         R                  :H  U4S j5        [         R                  " TR                  T R                  :H  U4S j5        T R                  T R                  S5      TR                  S5      T R                  S	9$ )
Nr   c                  ,   > ST R                  5        S3$ )Nzx must be a 2D tensor, got rZ  r   r=   s   r8   rg   /meta_int4mm_packed_weight_cpu.<locals>.<lambda>      -H	QR+Sr:   c                  ,   > ST R                  5        S3$ )Nzw must be a 2D tensor, got rZ  r   r  s   r8   rg   rI    rJ  r:   c                  "   > ST R                    3$ Nz#expected x to be f32/f16/bf16, got r   rH  s   r8   rg   rI    s    9!''Cr:   c                  "   > ST R                    3$ Nzexpected w to be uint8, got r   rL  s   r8   rg   rI    s    .J177),Tr:   c                  "   > ST R                    3$ )Nz q_group_size must be int64, got r   )q_group_sizes   r8   rg   rI     s    6|7I7I6JKr:   c                  "   > ST R                    3$ )Nz5q_scale_and_zeros must have the same dtype as x, got r   )q_scale_and_zeross   r8   rg   rI  $  s    KL]LcLcKder:   r   r   )rX   rj   r   r`   rm  rn  ro  r  r   r   r   r=   r  rR  rT  s   ````r8   meta_int4mm_packed_weight_cpurV    s    QUUW\#STQUUW\#STGGu}}ennEEC	
 	GGu{{"$T	
 	%++-K	
 	##qww.e	
 {{166!9affQiqww{??r:   c                    ^ ^^^ [         R                  " T R                  5       T:H  =(       a    T R                  T   T:H  UUUU 4S j5        g )Nc                  j   > ST  ST ST S3STR                  5        ST STR                  T    3-   $ )NzExpected a tensor of dimension z and tensor.size[z] == r}   zbut got : dimension z] = r   r   )r   dim_sizer   r=  s   r8   rg    check_dim_size.<locals>.<lambda>-  sN    1#6GzQVW[V\\^_ .?zfll[cNdMe
fgr:   )rX   rj   r   r   )r=  r   rZ  r   s   ````r8   check_dim_sizer\  *  s6    	LL

>X 6$ >	gr:   c                   ^  S nU" SU5      u  p[         R                  " [        U5      S;   S 5        [         R                  " T R                  [         R                  [         R
                  [         R                  [         R                  4;  U 4S j5        [        U5      S:X  a  XpO$[        U5      S:X  a
  US   US   pOU" SU5      u  pU" S	U5      u  p[         R                  " US L =(       d    US:g  S
 5        T R                  5       S:X  a  T R                  S5      OSnT R                  S5      nT R                  S5      nT R                  S5      n[        UXU
SU5      n[        UXUSU5      n[        R                  " T 5      n[        T UU	U
UUUSSUUUUUU5        T R                  5       S:X  a  UUU/nOXUU/n[         R                  " UT R                  T R                  US9$ )Nc                    ^  [         R                  " [        U5      S;   U 4S j5        US   n[        U5      S:X  a  UOUS   nX#4$ )Nr3   r   c                     > ST  S3$ )Nzavg_pool2d: 4 must either be a single int, or a tuple of two intsr4   r  s   r8   rg   1meta_avg_pool2d.<locals>.unpack.<locals>.<lambda>?      l4&(\]r:   r   r3   rX   rj   r   r  r  HWs   `   r8   unpackmeta_avg_pool2d.<locals>.unpack<  E    H]	
 FSQACFtr:   r  r   r3   r   c                      gNzOavg_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   rg   !meta_avg_pool2d.<locals>.<lambda>H      ar:   c                  @   > ST R                   R                  5        S3$ )Nz""avg_pool2d" not implemented for 'r  r  r  s   r8   rg   ro  L      6u{{7J7J7L6MQQr:   r   r3   r   r  c                      gNzdivisor must be not zeror4   r4   r:   r8   rg   ro  Y      *r:   r0  r>  r  r   r0   r  )rX   rj   r   r`   r  uint16uint32uint64r   r   pooling_output_shaperQ   r   pool2d_shape_checkr   r   )r   r  r   r  rA  count_include_paddivisor_overrideri  kHkWdHdWpadHpadWr	  rB  inputHeight
inputWidthrC  rD  r   r   s   `                     r8   meta_avg_pool2dr  2  s    M;/FB	LLFy a 
LLEKKu||U\\RRQ 6{aB	V	F1IB&)	7+JD	LLD 9$4$9*
  %yy{a/UZZ^QF**R.K**R.KBJ'Rr1iPL&z2RINK//6M



		$ yy{a\;7\;?;;kk||#	 r:   c                     [        U UUUUUUSSU	U
UUUU5        U R                  5       nU	n[        XUS-
  U5        [        XUS-
  U5        [        XUS-
  U5        g )Nr3   r0   r   )r{  r   r\  )r   
gradOutputr	  r~  r  r  r  r  r  rB  r  r  rC  rD  
mem_formatr   nOutputPlanes                    r8   avg_pool2d_backward_shape_checkr    s{    " 



		$ 99;DL:TAX|<:TAX|<:TAX{;r:   c                    [         R                  " [        U5      S:H  =(       d    [        U5      S:H  S 5        US   n[        U5      S:X  a  UOUS   n	[         R                  " [        U5      S:H  =(       d#    [        U5      S:H  =(       d    [        U5      S:H  S 5        [        U5      S:X  a  UOUS   n
[        U5      S:X  a  U	O[        U5      S:X  a  U
OUS   n[         R                  " [        U5      S:H  =(       d    [        U5      S:H  S 5        US   n[        U5      S:X  a  UOUS   n[         R                  " US L =(       d    US:g  S 5        UR                  nUR	                  5       S:X  a  US	   OSnUS
   nUS   nUS   n[        UXU
SU5      n[        UXUSU5      n[        R                  " U5      n[        UU UUU	U
UUUUUUUUU5        [         R                  " UUR                  UR                  US9$ )Nr3   r   c                      g)NzKavg_pool2d: kernel_size must either be a single int, or a tuple of two intsr4   r4   r:   r8   rg   *meta_avg_pool2d_backward.<locals>.<lambda>  s    ]r:   r   c                      grn  r4   r4   r:   r8   rg   r    rp  r:   c                      g)NzGavg_pool2d: padding must either be a single int, or a tuple of two intsr4   r4   r:   r8   rg   r    s    Yr:   c                      grt  r4   r4   r:   r8   rg   r    ru  r:   r0  r>  rv  r  r   r  )rX   rj   r   r   r   rz  rQ   r   r  r   r`   r   )gradOutput_r   r  r   r  rA  r|  r}  r~  r  r  r  r  r  
input_sizer	  rB  r  r  rC  rD  r  s                         r8   meta_avg_pool2d_backwardr    s    
LLKA6[!1Q!6] 
QB;1$+a.B	LLFq@CK1,@Fq0@a 6{aVAYB6{a3v;!+;RB	LLG.S\Q.Y 1:Dw<1$4'!*D	LLD 9$4$9*
 J$yy{a/Z^QFR.KR.KBJ'Rr1iPL&z2RINK,,U3J#



$ ;;kk|| 	 r:   c                 n  ^  [         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n	[         R                  " U(       + =(       d    [        U5      S;   S 5        [         R                  " T R                  [         R                  [         R
                  [         R                  [         R                  4;  U 4S j5        U(       d  UOUS   n
U(       d  UO[        U5      S:X  a  U
OUS   nU(       d  U	O[        U5      S:X  a  U
OUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " T R                  S	;   S
 5        [         R                  " U(       + =(       d    US:g  S 5        T R                  S5      nT R                  S5      nT R                  S5      nT R                  S5      nT R                  S5      n[        UX}U
SU5      n[        UXUSU5      n[        UXUSU5      n[        T UUUU	U
UUUUUSSSUUUUUUSSS9  T R                  S:X  a  T R                  UUUU45      $ T R                  UUUUU45      $ )Nr3   r0   c                      gNzFavg_pool3d: kernel_size must be a single int, or a tuple of three intsr4   r4   r:   r8   rg   !meta_avg_pool3d.<locals>.<lambda>      Xr:   r   r3   r   c                      gNzJavg_pool3d: stride must be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   rg   r    r  r:   c                  @   > ST R                   R                  5        S3$ )Nz""avg_pool3d" not implemented for 'r  r  r  s   r8   rg   r    rr  r:   c                      gNzBavg_pool3d: padding must be a single int, or a tuple of three intsr4   r4   r:   r8   rg   r        Tr:   r0  rX  c                      gNz9non-empty 4D or 5D (batch mode) tensor expected for inputr4   r4   r:   r8   rg   r  "      Kr:   c                      grt  r4   r4   r:   r8   rg   r  '  ru  r:   r>  rv  r  r   zavg_pool3d()T)check_input_sizer0  )rX   rj   r   r`   r  rw  rx  ry  r   r   rz  pool3d_shape_checkr   )r   r  r   r  rA  r|  r}  kTr~  r  dTr  r  padTr  r  r	  nslicesitimeiheightiwidthotimeoheightowidths   `                       r8   meta_avg_pool3dr    s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ 
LLEKKu||U\\RRQ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 ZZ]FjjnGJJrNEjjnGZZ^F "aCE"7Bb!YGG!&"B9EF





			-2 zzQ@AAHIIr:   c                 B   [         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n	[        U5      S:X  a  UOUS   n
[         R                  " U(       + =(       d    [        U5      S;   S 5        U(       d  UOUS   nU(       d  U	O[        U5      S:X  a  UOUS   nU(       d  U
O[        U5      S:X  a  UOUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " UR                  S;   S	 5        [         R                  " U(       + =(       d    US:g  S
 5        UR	                  S5      nUR	                  S5      nUR	                  S5      nUR	                  S5      n[        UXUSU5      n[        UXUSU5      n[        UU
UUSU5      n[        UU UUU	U
UUUUUUUUUUUUS5        UR                  UR                  5      $ )Nr  c                      gr  r4   r4   r:   r8   rg   *meta_avg_pool3d_backward.<locals>.<lambda>a  r  r:   r   r3   r   c                      gr  r4   r4   r:   r8   rg   r  i  r  r:   c                      gr  r4   r4   r:   r8   rg   r  q  r  r:   r  c                      gr  r4   r4   r:   r8   rg   r  y  r  r:   c                      grt  r4   r4   r:   r8   rg   r  ~  ru  r:   r>  rv  r  r   zavg_pool3d_backward())	rX   rj   r   r   r   rz  avg_pool3d_backward_shape_checkr   r   )r%  r   r  r   r  rA  r|  r}  r  r~  r  r  r  r  r  r  r  r  r  r  r  otime_for_shape_checkoheight_for_shape_checkowidth_for_shape_checks                           r8   meta_avg_pool3d_backwardr  S  s    
LLKF"X 
QB;1$+a.B;1$+a.B	LL
+c&kV+\ vayBc&kQ&6F1IBc&kQ&6F1IB	LLGT 1:Dw<1$4'!*Dw<1$4'!*D	LL

fK
 
LL5 0A 5*
 jjnGJJrNEjjnGZZ^F0"aS27Bb!YW1&"dB9U#





', ??5;;''r:   c                 8  ^  [         R                  " T R                  S:H  =(       d    T R                  S:H  U 4S j5        T R                  S S [	        U5      -   n[
        R                  " T 5      n[         R                  " UT R                  T R                  US9$ )Nr0   r0  c                  "   > ST R                    3$ )Nz"Expected 3D or 4D tensor, but got rR  r   s   r8   rg   *meta_adaptive_avg_pool2d.<locals>.<lambda>      4TZZLAr:   r  r  )
rX   rj   r   r   ri   rQ   r   r   r`   r   )r   output_sizert  r   s   `   r8   meta_adaptive_avg_pool2dr    s|    	LL		Q($))q.A ::cr?U;%77L//5M ;;jj{{#	 r:   c                    ^  [         R                  " T R                  S:H  =(       d    T R                  S:H  U 4S j5        T R                  T R                  S S [        U5      -   5      $ )Nr0  rX  c                  "   > ST R                    3$ )Nz"Expected 4D or 5D tensor, but got rR  r   s   r8   rg   *meta_adaptive_avg_pool3d.<locals>.<lambda>  r  r:   rv  )rX   rj   r   r   r   ri   )r   r  s   ` r8   meta_adaptive_avg_pool3dr    sO    	LL		Q($))q.A >>$**Sb/E+,>>??r:   c                   ^ ^^ T R                   n[        SU5       H1  m[        R                  " T R	                  T5      S:  U U4S j5        M3     [        R                  " US:H  =(       d    US:H  U4S j5        [        R                  " TR
                  T R
                  :H  U U4S j5        [        R                  n[        T5      (       a  [        R                  nTR                  TR                  5      R                  US9$ )	Nr3   r   c                  *   > ST R                    ST S3$ )Nz{adaptive_avg_pool2d_backward(): Expected grad_output to have non-zero                       size for non-batch dimensions,  with dimension  being emptyrR  )grad_outr   s   r8   rg   4meta__adaptive_avg_pool2d_backward.<locals>.<lambda>  s$     66>nn5EEUVWUXXdfr:   r0   r0  c                  "   > ST R                    3$ )NzBadaptive_avg_pool2d_backward(): Expected 3D or 4D tensor, but got rR  r   s   r8   rg   r    s    TUYU_U_T`ar:   c                  <   > STR                    ST R                    3$ Nexpected dtype z! for `grad_output` but got dtype r   )r  r   s   r8   rg   r    s    /$**-Nx~~N^_r:   r   )r   r   rX   rj   r   r`   r   r  r  r   r   r4  )r  r   r   r   r   s   ``  @r8   "meta__adaptive_avg_pool2d_backwardr    s    ==D1d^MM!q f	
  
LL	TQYa 
LL

hnn$_ ++M++>>$**%((}(EEr:   c                 `    [        U S5        [        R                  " U[        R                  S9$ )Nadaptive_avg_pool3d_backwardr   )!_adaptive_pool_empty_output_checkrX   r   r%  r%  r   s     r8   "meta__adaptive_avg_pool3d_backwardr    s(     &k3QRD0N0NOOr:   r%  c                    ^ ^^ T R                   n[        SU5       H2  m[        R                  " T R	                  T5      S:  UU U4S j5        M4     g )Nr3   r   c                  .   > T  STR                    ST S3$ )Nzc(): Expected grad_output to have non-zero size for non-batch dimensions, but grad_output has sizes r  r  rR  )r  r%  r   s   r8   rg   3_adaptive_pool_empty_output_check.<locals>.<lambda>  s*    * --8->->,??OPQsR^`r:   )r   r   rX   rj   r   )r%  r  r   r   s   `` @r8   r  r    sB    D1d^Q!#	
 r:   c                   ^ ^ T R                   n[        R                  " US;   U 4S j5        [        SU5       H1  m[        R                  " T R	                  T5      S:  UU 4S j5        M3     [        R                  " [        U5      S:H  S 5        SnSnSnT R                   S:X  a  T R	                  S5      nUS-  nT R	                  US-
  5      nUu  pgT R                   S	:X  a6  XVU4nT R                  U5      n	T R                  U[        R                  S
9n
X4$ XEXg4n[        R                  " T 5      nT R                  U5      R                  US9n	T R                  U[        R                  S
9R                  US9n
X4$ )Nr0   r0  c                  "   > ST R                    3$ )Nz:adaptive_max_pool2d(): Expected 3D or 4D tensor, but got: rR  r  s   r8   rg   *meta_adaptive_max_pool2d.<locals>.<lambda>      LU[[MZr:   r3   r   c                  *   > STR                    ST  S3$ )Nzjadaptive_max_pool2d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rR  r   r   s   r8   rg   r         '',{{m3CA3lTr:   r   c                      g)NzCadaptive_max_pool2d(): internal error: output_size.size() must be 2r4   r4   r:   r8   rg   r        Ur:   r0  r0   r   r   )r   rX   rj   r   r   r   r   r   rQ   r   r4  )r   r  r   dimHsizeBsizeDosizeHosizeWr   r   r   r   r   s   `           @r8   meta_adaptive_max_pool2dr    sk    ::D	LLZ 1d^JJqMA	
  
LLKAU
 DEEzzQ

1	JJtax E NFzzQF+	ooi(//)5;;/?|62	33E:ooi(++-+H//)5;;/?BB' C 
 |r:   c                 T  ^ ^ T R                   n[        R                  " US;   U 4S j5        [        T S5        [        R                  " TR                  T R                  :H  U U4S j5        [
        R                  " T5      nTR                  TR                  5      R                  US9$ )Nr  c                  "   > ST R                    3$ )NzKadaptive_max_pooling2d_backward(): Expected 3D or 4D grad_output, but got: rR  r%  s   r8   rg   3meta_adaptive_max_pool2d_backward.<locals>.<lambda>  s    ]^i^o^o]pqr:   adaptive_max_pool2d_backwardc                  <   > STR                    ST R                    3$ r  r   )r%  r   s   r8   rg   r  %  s    /%++.OP[PaPaObcr:   r   )
r   rX   rj   r  r`   rQ   r   r   r   r4  )r%  r   r   r   r   s   ``   r8   !meta_adaptive_max_pool2d_backwardr    s     D	LLq
 &k3QR	LL{(((c
 //6M??5;;'***GGr:   c                   ^ ^ T R                   n[        R                  " US;   U 4S j5        [        SU5       H1  m[        R                  " T R	                  T5      S:  UU 4S j5        M3     [        R                  " [        U5      S:H  S 5        SnSnSnUS:X  a  T R	                  S5      nUS-  nT R	                  U5      nUu  pgnUS	:X  a  XVXx4n	OXEXgU4n	T R                  U	5      n
T R                  U	[        R                  S
9nX4$ )Nr  c                  "   > ST R                    3$ )Nz:adaptive_max_pool3d(): Expected 4D or 5D tensor, but got: rR  r  s   r8   rg   *meta_adaptive_max_pool3d.<locals>.<lambda>2  r  r:   r3   r   c                  *   > STR                    ST  S3$ )Nzjadaptive_max_pool3d(): Expected input to have non-zero size for non-batch dimensions, but input has sizes r  r  rR  r  s   r8   rg   r  7  r  r:   r0   c                      g)NzCadaptive_max_pool3d(): internal error: output_size.size() must be 3r4   r4   r:   r8   rg   r  ?  r  r:   rX  r0  r   )r   rX   rj   r   r   r   r   r   )r   r  r   dimDr  r  osizeTr  r  r   r   r   r   s   `           @r8   meta_adaptive_max_pool3dr  ,  s    ::D	LLZ 1d^JJqMA	
  
LLKAU
 DEEqy

1	JJtE(FFqyF3	66:	
//)
$Cooiu{{o;G<r:   c                 P    [        U S5        UR                  UR                  5      $ )Nadaptive_max_pool3d_backward)r  r   r   )r%  r   r   s      r8   !meta_adaptive_max_pool3d_backwardr  X  s"     &k3QR??5;;''r:   c                 @    Uc  [        S5      eU R                  U5      $ )Nz:cannot repeat_interleave a meta tensor without output_size)r3  r   )repeatsr  s     r8   meta_repeat_interleave_Tensorr   _  s%    WXX[))r:   c                    U R                   R                  (       d  [        SU R                    35      eUR                   R                  (       d  [        SUR                    35      e[        U R	                  [        U R                   5      5      UR	                  [        UR                   5      5      [        R                  S9nU$ )Nz!real must be floating point, got z!imag must be floating point, got rN   )r`   r  r   rV   r4  r   r   rR   )realimagr  s      r8   meta_complexr  f  s     ::''@MNN::''@MNN+DJJ78+DJJ786>>F
 Mr:   )
fill_valuer  c                    [        U 5      S;   a-  U R                  XR                  5       4[        R                  S9$ [        R
                  " XR                  5       4SU4[        R                  U R                  S9$ )N)cpur   r   r3   r`   r   )r  r   r   rX   r   r@  r   )r   r   r  s      r8   nonzero_staticr
  u  se     4N*~~tXXZ0

~CC""88:I**;;	
 	
r:   c                    [         R                  " [        R                  S 5        [         R                  " U R                  5       U R                  5       4SU R                  5       4[         R                  U R                  S9$ )Nc                      g)NaY  The register_meta function for torch.nonzero() raises unimplemented by default, as a correct data-independent implementation does not exist. This implementation returns a fake value, assuming all elements of the tensor are non-zero. To enable this registration, please set 'torch.fx.experimental._config.meta_nonzero_assume_all_nonzero' to True.r4   r4   r:   r8   rg   nonzero.<locals>.<lambda>  s     Sr:   r3   r	  )	rX   _check_not_implementedr  meta_nonzero_assume_all_nonzeror@  r   r   r   r   r   s    r8   nonzeror    sf     
  22	S 	txxz"	
DJJLjj{{	 r:   c           
      n	  ^ ^^^^^^^^ [         R                  " [        T5      S 5        / n[        T5       GH  u  mmTGbr  [         R                  " TR                  [         R
                  [         R                  [         R                  [         R                  4;   S 5        TR                  [         R                  [         R                  4;   a  TR                  5       n[        U5      m[         R                  " TTR                  -   T R                  :*  U 4S j5        [        TR                  5       Hc  m[         R                  " TR                  T   T R                  TT-      :H  UUUUU 4S j5        UR                  UR                  ST5      5        Me     GMi  UR                  T5        GM}  UR                  T5        GM     Um[         R                  " [        T5      T R                  :*  UU 4S j5        SS KJn  [%        UR&                  " T6 5      m[        T5      T R                  :  a,  TR                  S 5        [        T5      T R                  :  a  M,  SnSnT H&  mUS:X  a	  Tb  SnM  M  US:X  a	  Tc  S	nM  M!  Tc  M&    O   S
nU(       d  / n/ n[        T5       H-  u  mmTc  M  UR                  T5        UR                  T5        M/     [        T5       H-  u  mmTb  M  UR                  T5        UR                  T5        M/     T R)                  U5      m Um/ m/ m/ m[        T5       He  u  n	mTcG  T(       a   TR                  T R                  U	   5        M0  TR                  T R                  U	   5        MP  [%        TR                  5      mMg     UUU4S jn
T R+                  TT-   T-   5      nSSKJn  U" T R1                  5       S:H  5      (       a  U$ U
" T 5      n[2        R4                  " U5      u  p[%        U5      [%        [        [        U5      5      5      :w  a  [2        R6                  " UR                  U5      n[2        R8                  " U5      n[2        R6                  " U[2        R:                  " U5      5      nUR=                  UR?                  5       U5      nU$ )Nc                      g)Nz#at least one index must be providedr4   r4   r:   r8   rg   #meta_index_Tensor.<locals>.<lambda>  s    (Mr:   c                      g)Nz?tensors used as indices must be long, int, byte or bool tensorsr4   r4   r:   r8   rg   r    s    Yr:   c                  "   > ST R                    3$ )N)too many indices for tensor of dimension r  r   s   r8   rg   r    s    G		{Sr:   c            	      N   > STR                    ST  STR                    STT-    3$ )NzThe shape of the mask 
 at index z0 does not match the shape of the indexed tensor rR  )r   r   jrb  r   s   r8   rg   r    s:    "8ZPQs SJJN**U_`ade`e_f!hr:   r3   c                  <   > STR                    S[        T 5       S3$ )Nr  z (got r~   )r   r   )r   r   s   r8   rg   r    s    ;DII;fSQX\NZ[\r:   r   Fr   Tc                    > TT-   T-   n[        U R                  5       5      nS/[        T5      -  U[        T5      [        U R                  5      [        T5      -
  & U R	                  X5      $ )z9
This follows restride_src in TensorAdvancedIndexing.cpp
r   )r   r   r   r   rQ  )r   r   r   after_shapebefore_shapereplacement_shapes      r8   _restride_src(meta_index_Tensor.<locals>._restride_src  sm     00;>t{{}%KL#PSQ
 K
L!C

Oc+6F$FG u..r:   guard_or_false) rX   rj   r  	enumerater`   r   r   rO  r  r   r   r   r   r   r   selecttorch._refs_refsr   r&   r   r   r1  r"  r   rQ   3compute_elementwise_output_logical_to_physical_perm
apply_permr   invert_permrQ  r   )r   r   r  r  refsstatehas_contiguous_subspacer  transposed_indicesr   r  r   r"  restrided_selfpermrT   
perm_shaper@  r  r  r   r   r  rb  r  s   ``                @@@@@@@r8   meta_index_Tensorr1    s   	LLg MN #%Fg&5LL

EIIuzz5::NNY {{uzz5::66--/K""

Ndii/S uzz*A&&A$**QU*;;h h
 MM'..A"67 + e$MM% / '0 G	LLG		!\
 4(('23G
g,
"t g,
" E#A:  !aZ}     #'
 #!'*HAu A"))%0 + "'*HAu}A"))%0 + ||D!$ !LK#%(
U= ""4::c?3##DJJsO4 $U[[ 1 )
/ ..(99KG
HCDdjjla'((

 #4(NGGWGD DzT%D	*++%%cii6
66zB
%%j%2C2CD2IJ
nnSXXZ4Jr:   c                 J   S nS nS nS nU
S   (       a4  U" X5      nU R                  UR                  5       5      R                  US9nU
S   (       a4  U" X5      nU R                  UR                  5       5      R                  US9nU
S   (       a  U R                  U5      nXU4$ )Nc                 .   [        U 5      n[        U5      nU[        R                  :X  d  U[        R                  :X  a  [        R                  $ U[        R                  :X  d  U[        R                  :X  a  [        R                  $ [        R                  $ rD   )r   rX   r  r  r   )t1t2fmt1fmt2s       r8   _conv_memory_format6meta_convolution_backward.<locals>._conv_memory_format9  sr     %R($R(5&&&$%2E2E*E&&&5)))TU5K5K-K)))&&&r:   r   r   r3   r   )r   r   r4  )grad_output_input_weight_bias_sizes_optr   r  r  
transposedr  r  output_maskbackend_grad_inputbackend_grad_weightbackend_grad_biasr8  r   s                   r8   meta_convolution_backwardrC    s      	' 1~+LB)33FKKMBEE' F 
 1~+FA*44W\\^DGG' H 
 1~(22>B5FGGr:   c                  ^^ TR                  S5      nTR                  S5      nU R                  XV45      n [        R                  " TR	                  5       S:H  S 5        [        R                  " TR	                  5       S:H  S 5        [        R                  " TR                  S5      TR                  S5      :H  UU4S j5        [        R                  " TR                  S5      TR                  S5      :H  UU4S j5        [        R                  " U R                  S5      U:H  =(       a    U R                  S5      U:H  S	 5        U R                  U R                  5       5      $ )
Nr3   r   r0   c                      gr  r4   r4   r:   r8   rg   meta_addbmm.<locals>.<lambda>Z  r  r:   c                      gr  r4   r4   r:   r8   rg   rF  [  r  r:   r   c                  P   > ST R                  S5       STR                  S5       3$ )Nz8batch1 and batch2 must have same number of batches, got r   r   r   r  r  s   r8   rg   rF  ^  s,    J6;;WX>JZZ_`f`k`klm`n_opr:   c            
         > ST R                  S5       ST R                  S5       STR                  S5       STR                  S5       S3	$ )Nz#Incompatible matrix sizes for bmm (r3   r=   r   r   r~   r   rI  s   r8   rg   rF  b  sL    1&++a.1A6;;q>BR S;;q>"!FKKN#316r:   c                      g)Nz.self tensor does not match matmul output shaper4   r4   r:   r8   rg   rF  i  s    @r:   )r   r  rX   rj   r   r   )r   r  r  rc  rb  r  r  s    ``    r8   meta_addbmmrL  T  s     ;;q>D;;q>D;;|$D	LL"$HI	LL"$HI	LLA&++a.(p 
LLA&++a.(	
 
LL		!51!5@ >>$))+&&r:   c                 @    U R                  U R                  5       5      $ rD   r   r   )r   r  kwargss      r8   meta_randint_likerP  n  s    >>$))+&&r:   )
grad_scale	found_infc       	         p   ^ XX#XE4 H,  m[         R                  " [        T[        5      U4S j5        M.     g )Nc                      > S[        T 5       3$ Nz'exponent must be a tensor list but got r   ls   r8   rg   #meta__fused_adam_.<locals>.<lambda>      =d1gYGr:   rX   rj   rt   r   )r   gradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepslrbeta1beta2weight_decayepsamsgradmaximizerQ  rR  rX  s                  @r8   meta__fused_adam_rh  s  s0    & 8/Oq$G	
 Pr:   c       	            ^ XX#XE4 H,  m[         R                  " [        T[        5      U4S j5        M.     S nU" U 5      U" U5      U" U5      U" U5      U" U5      4$ )Nc                      > S[        T 5       3$ rU  rV  rW  s   r8   rg   "meta__fused_adam.<locals>.<lambda>  rZ  r:   c                 Z    U  Vs/ s H  n[         R                  " U5      PM     sn$ s  snf rD   rG  )tensor_listr  s     r8   empty_like_list)meta__fused_adam.<locals>.empty_like_list  s%    -89[  #[999s    (r[  )r   r\  r]  r^  r_  r`  ra  rb  rc  rd  re  rf  rg  rQ  rR  rn  rX  s                   @r8   meta__fused_adamrp    si    & 8/Oq$G	
 P: 	!$( r:   c                 t  ^ ^ [         R                  " T R                  5       S:H  S 5        [         R                  " TR                  5       S:H  S 5        [         R                  " T R                  [         R                  L U 4S j5        [         R                  " TR                  [         R                  L U4S j5        [         R                  " T R                  S5      TR                  S5      :H  U U4S j5        T R                  T R                  S5      TR                  S5      4[         R                  S	9$ )
Nr   c                      g)Nza must be a 2D tensorr4   r4   r:   r8   rg   meta__int_mm.<locals>.<lambda>      '>r:   c                      g)Nzb must be a 2D tensorr4   r4   r:   r8   rg   rs    rt  r:   c                  "   > ST R                    3$ )Nzexpected self to be int8, got r   )r6   s   r8   rg   rs        0	:r:   c                  "   > ST R                    3$ )Nzexpected mat2 to be int8, got r   )r7   s   r8   rg   rs    rw  r:   r3   r   c            
         > ST R                  S5       ST R                  S5       STR                  S5       STR                  S5       S3	$ )Nz'Incompatible matrix sizes for _int_mm (r   r=   r3   r   r~   r   r5   s   r8   rg   rs    sH    5affQi[!&&) M66!9+Qqvvayk,r:   r   )rX   rj   r   r`   rO  r   r   rP  r5   s   ``r8   meta__int_mmrz    s     
LLA>?	LLA>?	LL	5::: 
LL	5::: 
LL	q	QVVAY	
 ;;q	166!9-U[[;AAr:   c                 d  ^  [         R                  " T R                  5       S:H  S 5        [         R                  " T R                  [         R                  L U 4S j5        T R                  S5      nT R                  S5      S-  nT R                  US-  X1S-  -  SUS-  4[         R                  S	9$ )
Nr   c                      gNzw must be a 2D tensorr4   r4   r:   r8   rg   2meta__convert_weight_to_int4pack.<locals>.<lambda>  rt  r:   c                  "   > ST R                    3$ rP  r   rL  s   r8   rg   r~        .qwwi8r:   r   r3             r   )rX   rj   r   r`   r  r   r   rP  r  inner_k_tilesr  rb  s   `   r8    meta__convert_weight_to_int4packr    s    	LLA>?	LL	5;;8 	
q	A	q	AA;;F"$%Q		
 kk   r:   c                 H  ^  [         R                  " T R                  5       S:H  S 5        [         R                  " T R                  [         R                  L U 4S j5        T R                  S5      nT R                  S5      nT R                  X#S-  4[         R                  S9$ )Nr   c                      gr}  r4   r4   r:   r8   rg   :meta__convert_weight_to_int4pack_for_cpu.<locals>.<lambda>  rt  r:   c                  "   > ST R                    3$ Nzexpected w to be int32, got r   rL  s   r8   rg   r    r  r:   r   r3   r   )rX   rj   r   r`   rP  r   r   r  r  s   `   r8   (meta__convert_weight_to_int4pack_for_cpur    s}    	LLA>?	LL	5;;8 	
q	A	q	A;;	
Fkk   r:   c                   ^ ^^ [         R                  " T R                  5       S:H  S 5        TR                  R                  S:X  a  SOSm[         R                  " TR                  5       T:H  U4S j5        [         R                  " T R
                  [         R                  [         R                  [         R                  4;   U 4S j5        [         R                  " TR
                  [         R                  L U4S j5        TR                  R                  S:X  a  TR                  S5      OTR                  S5      S	-  nT R                  T R                  S5      UT R
                  S
9$ )Nr   c                      gNzx must be a 2D tensorr4   r4   r:   r8   rg   *meta__weight_int4pack_mm.<locals>.<lambda>  rt  r:   r   r0  c                     > ST  S3$ )Nzw must be a zD tensorr4   )expected_dims   r8   rg   r    s    Lh2Wr:   c                  "   > ST R                    3$ rN  r   rH  s   r8   rg   r        5aggY?r:   c                  "   > ST R                    3$ r  r   rL  s   r8   rg   r    r  r:   r   r  r   )rX   rj   r   r  r   r`   rm  rn  ro  rP  r   r   )r=   r  rR  rT  dim_nr  s   ``   @r8   meta__weight_int4pack_mmr    s    	LLA>?**e31L	LLL(*WX	LL	EMM5==%..AA? 
LL	5;;8 ++u4AFF1I!&&)a-E;;qvvay%qww;77r:   c                 0  ^ ^ [         R                  " T R                  5       S:H  S 5        [         R                  " TR                  5       S:H  S 5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        [         R                  " TR                  [         R                  L U4S j5        T R                  T R                  S5      TR                  S5      T R                  S9$ )Nr   c                      gr  r4   r4   r:   r8   rg   2meta__weight_int4pack_mm_for_cpu.<locals>.<lambda>  rt  r:   c                      gr}  r4   r4   r:   r8   rg   r     rt  r:   c                  "   > ST R                    3$ rN  r   rH  s   r8   rg   r    r  r:   c                  "   > ST R                    3$ rP  r   rL  s   r8   rg   r    r  r:   r   r   )
rX   rj   r   r`   rm  rn  ro  r  r   r   rU  s   ``  r8    meta__weight_int4pack_mm_for_cpur        	LLA>?	LLA>?	LL	EMM5==%..AA? 
LL	5;;8 ;;qvvay!&&)177;;;r:   c                 0  ^ ^ [         R                  " T R                  5       S:H  S 5        [         R                  " TR                  5       S:H  S 5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        [         R                  " TR                  [         R                  L U4S j5        T R                  T R                  S5      TR                  S5      T R                  S9$ )Nr   c                      gr  r4   r4   r:   r8   rg   ;_weight_int4pack_mm_with_scales_and_zeros.<locals>.<lambda>  rt  r:   c                      gr}  r4   r4   r:   r8   rg   r    rt  r:   c                  "   > ST R                    3$ rN  r   rH  s   r8   rg   r    r  r:   c                  "   > ST R                    3$ r  r   rL  s   r8   rg   r    r  r:   r   r   )
rX   rj   r   r`   rm  rn  ro  rP  r   r   )r=   r  rR  qScaleqZeross   ``   r8   )_weight_int4pack_mm_with_scales_and_zerosr    r  r:   r6   r7   c                     X-   S-
  U-  U-  $ r2   r4   r5   s     r8   kai_roundupr    s    UQY1!!r:   c                   ^	^
^^^^^^^^^ U S:X  a{  X2:X  a(  SnSnSnSmSmSmS mUUUU4S jmU4S jnU" XXEU5      $ US-  S	:X  aD  X#-  S	:X  a;  SnSnSnSmSmSmSm	U	UU4S
 jnU	U
UUUUU4S jmS m
U	4S jmU	4S jmU" XXEXc5      $ g g g )Nr0  r  r  r   c                 4    [        X-  S5      n[        X5      $ )Nr0  r  )rb  krsrkr_sr_roundedup4s       r8   kai_k_roundedup3get_kai_packed_weight_size.<locals>.kai_k_roundedup+  s     $/rw#: "177r:   c                 f   > T" XU5      nUS-  S:w  a  [        SU 35      eUUS-  T-   T-   T-   -  $ )Nr   r   zk_internal must be even, got r%  )	rb  nrr  r  
k_internalr  kai_num_bytes_biaskai_num_bytes_multiplier_rhskai_num_bytes_sum_rhss	        r8   9kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4cxp_qsu4cxs1s01  s^     -QB7
Nq((+H)UVV1_23+, )) r:   c                 8   > [        X5      U-  nUT" XX45      -  $ rD   r  )r  rb  r  r  r  num_rowsr  s         r8   7kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0[get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4cxp_qsu4cxs1s0@  s0     'q-3 Orr:   r  r   c                    > XS-  S:w  a  [        SU SU S35      eUT	-  S:w  a  [        SU ST	 S35      eUT-  S:w  a  [        SU ST S35      e[        X5      U-  nUT" XX4U5      -  $ Nr   bl (z) must be divisible by kr (r~   znr (z+) must be divisible by kai_nr_multiple_of (+) must be divisible by kai_bl_multiple_of (r   r  )
r  rb  r  r  r  blr  kai_bl_multiple_of;kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0kai_nr_multiple_ofs
          r8   9kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0]get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0X  s     G>(4t3NrdRS)TUU++1(rd"MN`Maabc  ++1(rd"MN`Maabc  'q-3 Qrrr:   c                    > XB-  S:w  a  [        SU SU S35      eUT
-  S:w  a  [        SU ST
 S35      eUT-  S:w  a  [        SU ST S35      eT	" 5       nT" X5      nT" XE5      nUXv-  T-   T-   -  $ r  r%  )rb  r  r  r  r  num_bytes_multiplier_rhsnum_blocks_per_rownum_bytes_per_blockr  #kai_get_bf16_datatype_size_in_bytesr  kai_num_blocks_per_rowr  kai_num_bytes_per_blockr  s           r8   r  _get_kai_packed_weight_size.<locals>.kai_get_rhs_packed_stride_rhs_pack_nxk_qsi4c32p_qsu4c32s1s0o  s     G>(4t3NrdRS)TUU++1(rd"MN`Maabc  ++1(rd"MN`Maabc 
 ,O+P(%;A%B"&='# (=+,() r:   c                      g)Nr   r4   r4   r:   r8   r  Gget_kai_packed_weight_size.<locals>.kai_get_bf16_datatype_size_in_bytes  s    r:   c                 V   > UT-  S:w  a  [        SU ST S35      e[        X5      U-  $ )Nr   r  r  r~   r  )rb  r  r  s     r8   r  :get_kai_packed_weight_size.<locals>.kai_num_blocks_per_row  sF    ++1(rd"MN`Maabc  #1)R//r:   c                 J   > U T-  S:w  a  [        SU  ST S35      eU S-  U-   $ )Nr   r  r  r~   r   r%  )r  r  r  s     r8   r  ;get_kai_packed_weight_size.<locals>.kai_num_bytes_per_block  sF    ++1(rd"MN`Maabc  a#;;;r:   r4   )n_bitsr  K	groupsizekai_nrkai_krkai_srr  r  r  r  r  r  r  r  r  r  r  r  r  s            @@@@@@@@@@@r8   get_kai_packed_weight_sizer    s    {>FFF$%!+,(!"8 
 Kff  ^q Q]a%7FFF$%!!"!"!#. :0< Mff ] &8 _ r:   c                   ^  [         R                  " T R                  [         R                  L U 4S j5        [         R                  R
                  R                  5       (       a  X4:X  a  UR                  [         R                  :X  d4  X4:  ac  US-  S:X  aZ  XC-  S:X  aR  UR                  [         R                  :X  a4  [        SXTU5      nT R                  [        U5      [         R                  S9$ T R                  5       UR                  5       -   nUb  XbR                  5       -  nT R                  U[         R                  S9$ )Nc                  "   > ST R                    3$ rP  r   )weightss   r8   rg   2meta__dyn_quant_pack_4bit_weight.<locals>.<lambda>  s    .w}}o>r:   r  r   r0  r   )rX   rj   r`   r  backendskleidiaiis_availabler\   ro  r  r   r   r   )r  scales_zerosrH  
block_sizein_featuresout_featurespacked_weight_sizes   `      r8    meta__dyn_quant_pack_4bit_weightr    s    
LL$> ~~++--		"|'9'9U[['H$R1$(A-""enn4 8|*
   %7!8 LL <+=+=+??jjl*/u{{CCr:   c                   ^ ^^ [         R                  " T R                  5       S:H  S 5        [         R                  " T R                  [         R                  :H  =(       d)    T R                  [         R
                  :H  =(       a    TT:H  UUU 4S j5        T R                  S5      nT R                  XTT R                  S9$ )Nr   c                      g)Nzinput must be a 2D tensorr4   r4   r:   r8   rg   -meta__dyn_quant_matmul_4bit.<locals>.<lambda>  s    )Dr:   c                  .   > STR                    ST  ST 3$ )NzPexpected input to be f32 or bf16 (bf16 requires block_size == in_features), got z with block_size=z and in_features=r   )r  r  inps   r8   rg   r    s%    99+.zl:KK=Zr:   r   r   )rX   rj   r   r`   rm  ro  r   r   )r  packed_weightsr  r  r  r  s   ` ``  r8   meta__dyn_quant_matmul_4bitr    s     
LLa!DE	LL	emm	# 	GII'EJ+,E	
 	A==		=::r:   c                 0  ^ ^ [         R                  " T R                  5       S:H  S 5        [         R                  " T R                  [         R                  [         R
                  [         R                  4;   U 4S j5        [         R                  " TR                  5       S:H  S 5        [         R                  " TR                  [         R                  L U4S j5        T R                  T R                  S5      TR                  S5      T R                  S9$ )Nr   c                      gr  r4   r4   r:   r8   rg   *meta__weight_int8pack_mm.<locals>.<lambda>  rt  r:   c                  "   > ST R                    3$ rN  r   rH  s   r8   rg   r    r  r:   c                      gr}  r4   r4   r:   r8   rg   r    rt  r:   c                  "   > ST R                    3$ )Nzexpected w to be int8, got r   rL  s   r8   rg   r    s    -aggY7r:   r   r   )
rX   rj   r   r`   rm  rn  ro  rO  r   r   )r=   r  q_scaless   `` r8   meta__weight_int8pack_mmr    s    	LLA>?	LL	EMM5==%..AA? 
LLA>?	LL	5::7 ;;qvvay!&&)177;;;r:   c                   ^ ^^ [         R                  " T R                  5       S:  U 4S j5        [         R                  " TR                  5       S:  U4S j5        [         R                  " T R                  S5      TR                  S5      :H  U U4S j5        [         R                  " [        R
                  " T R                  5      U 4S j5        [         R                  " [        R
                  " TR                  5      U4S j5        [         R                  " US:  S	 5        [         R                  " TS
;   U4S j5        T R                  S5      nTR                  S5      nT R                  S S nTR                  S S n[        [         R                  " Xg5      5      nUR                  XE/5        T R                  U5      $ )Nr   c                  ,   > ST R                  5        S3$ )Nz1cdist only supports at least 2D tensors, X1 got: rZ  r   x1s   r8   rg   $meta_cdist_forward.<locals>.<lambda>      CBFFH:QOr:   c                  ,   > ST R                  5        S3$ )Nz1cdist only supports at least 2D tensors, X2 got: rZ  r   x2s   r8   rg   r    r   r:   r   c                  P   > ST R                  S5       STR                  S5       3$ )Nz4X1 and X2 must have the same number of columns. X1: r   z X2: r   )r  r  s   r8   rg   r    s*    Frwwr{mSXY[Y`Y`acYdXefr:   c                  "   > ST R                    3$ )Nz3cdist only supports floating-point dtypes, X1 got: r   r  s   r8   rg   r        EbhhZPr:   c                  "   > ST R                    3$ )Nz3cdist only supports floating-point dtypes, X2 got: r   r  s   r8   rg   r    r  r:   r   c                      g)Nz)cdist only supports non-negative p valuesr4   r4   r:   r8   rg   r    s    !Lr:   )Nr   r3   r   c                     > ST  3$ )Nz(possible modes: None, 0, 1, 2, but was: r4   )compute_modes   r8   rg   r    s    :<.Ir:   r  )rX   rj   r   r   rQ   is_float_dtyper`   r   r   broadcast_shapesextendr   )	r  r  ru  r
  r1r2batch_tensor1batch_tensor2rt  s	   `` `     r8   meta_cdist_forwardr    sF   	LL
AO 
LL
AO 
LL
rwwr{"f 
LLRXX&P 
LLRXX&P 
LLaLM	LL'I 
B	BHHSbMMHHSbMM..}LML!<<%%r:   c                 <   UR                   S   nUR                   S   nUR                   S   nUR                   S S nUR                   S S n	[        [        R                  " X5      5      n
U
R	                  5       nUR                  Xe/5        [        R                  " U
5      nUS:X  d  US:X  d  US:X  d  US:X  a  [        R                  " U5      $ U[        UR                   5      :w  a  UR                  U5      n[        R                  " U[        R                  S9$ )Nr   r  r   r   )r   r   rX   r  copyr  mathprod
zeros_liker  r   r   )rz  r  r  ru  cdistc1r  r  r  r  r  tensor1_expand_sizebatch_products                r8   meta_cdist_backwardr    s     
"B	"B	"BHHSbMMHHSbMM 6 6} TU.335x(II23M	Qw"'R1W(:##d288n,YY*+Be.E.EFFr:   c	                 Z  ^ ^^^^^ [         R                  " TR                  [         R                  [         R                  4;   U4S j5        [         R                  " TR                  [         R                  [         R                  4;   U4S j5        [         R                  " [
        R                  " T R                  5      U 4S j5        TR                  S5      n	U(       a   [         R                  " U	S:  S 5        U	S-  n	T R                  U	T R                  S5      5      n
Tb  [         R                  " U[        :H  S 5        [         R                  " TR                  S:H  U4S j5        [         R                  " TR                  5       TR                  5       :H  UU4S	 j5        U4S
 jmS mUU4S jn[        T5      S:w  a}  TR                  TR                  S5      5      nTR                  TR                  5       5      nU[        :X  a"  TR                  U	T R                  S5      5      nOTR                  S5      nOU" T TX5      nU[        [        4;   d  U(       d!  TR                  TR                  S5      5      nOTR                  S5      nTR                  U	5      nTR                  S   nU[        :X  aG  U(       a   [         R                  " US:  S 5        US-  nTR                  UT R                  S   5      nOTR                  UR                  5       5      nXX4$ )Nc                  "   > ST R                    3$ )Nz(expected indices to be long or int, got r   )r   s   r8   rg   $meta_embedding_bag.<locals>.<lambda>*      :7==/Jr:   c                  "   > ST R                    3$ )Nz(expected offsets to be long or int, got r   )r  s   r8   rg   r  .  r   r:   c                  "   > ST R                    3$ )Nz/expected weight to be floating point type, got r   )rF  s   r8   rg   r  2  s    A&,,Pr:   r   r3   c                      gNz1include_last_offset: numBags should be at least 1r4   r4   r:   r8   rg   r  9  s    Gr:   c                      g)Nz@embedding_bag: per_sample_weights only supported with mode='sum'r4   r4   r:   r8   rg   r  B  s    Vr:   c                  $   > ST R                    S3$ )Nz1expected per_sample_weights to be 1D tensor, got rZ  r  )per_sample_weightss   r8   rg   r  F  s    GHZH_H_G``abr:   c                  N   > STR                  5        ST R                  5        S3$ )Nz%expected per_sample_weights.numel() (z$ to be the same as indices.numel() (r~   r   )r   r'  s   r8   rg   r  J  s/    78J8P8P8R7S T66=mmo5FaIr:   c                 L   > T" XU5      =(       a    UR                  S5      S:H  $ Nr   r3   r   )r7  r  r
  padding_idxis_fast_path_index_selects       r8   is_fast_path_index_select_scale;meta_embedding_bag.<locals>.is_fast_path_index_select_scaleP  s&    %c;?XELLQROWXDX	
r:   c                    U R                   [        R                  :H  =(       d    U R                   [        R                  :H  =(       a;    U R	                  S5      S:H  =(       a     UR	                  S5      S:H  =(       a    US:  $ r  )r`   rX   r\   rZ   r   )r7  r
  r-  s      r8   r.  5meta_embedding_bag.<locals>.is_fast_path_index_selectU  sb    YY%++%@ejj)@  

1" a A%  a		
r:   c                 .   > Ub	  T" XX#5      $ T" XU5      $ rD   r4   )r7  r  r
  r-  r.  r/  s       r8   is_fast_path(meta_embedding_bag.<locals>.is_fast_path]  s#    23vSS,S+FFr:   r  c                      gr$  r4   r4   r:   r8   rg   r  w  s    Or:   )rX   rj   r`   r   r   rQ   r  r   r   MODE_SUMr   r   r  MODE_MAX	MODE_MEANr   )rF  r   r  scale_grad_by_freqrz  sparser'  include_last_offsetr-  num_bagsr
  r4  
offset2bagbag_sizemax_indicesfast_path_sumnumBagsr.  r/  s   ```   `          @@r8   meta_embedding_bagrC    sw    
LL%**eii00J 
LL%**eii00J 
LLV\\*P
 ||AHMG	
 	AhA7F%HV	
 	##q(b	
 	$$&'--/9	



G 7u$&&w||A7
$$W\\^48!++Hfkk!nEK!++A.K$V-?UIx(( **7<<?;J **1-J$$X.--"8"qLO 1!++GV\\!_EK!++HMMO<Kx44r:   c                     [        XU/UQ76 u  pEpg[        U5      S:X  a  UR                  UR                  5       5      nXEXg4$ )Nr  )rC  r  r   r   )rF  r   r  rS   r
  r>  r?  r@  s           r8   meta_embedding_bag_forward_onlyrE    sN    0B1#'1-F 7u$$$W\\^4x44r:   c                     U(       a  U$ U R                   R                  (       d  U R                   R                  (       a  U R                   $ U(       a  [        R                  $ U R                   $ rD   )r`   r  r   rX   r   )r   r`   promote_int_to_longs      r8   _get_reduction_dtyperH    sD    {{$$(>(>{{	zz;;r:   r   c                    [        XSS9n[        R                  " U R                  U5      n[	        XU5      nU R                  XTS9$ )NT)rG  r   )rH  rQ   r  r   r  r   )r   r  r  r`   r   rt  s         r8   meta_nansumrJ    sC     ($OLT2D+EAL??<?<<r:   c           	          [         R                  " U R                  [        [	        U R                  5       5      5      5      nU R                  U5      $ rD   )rQ   r  r   ri   r   r   r   )r   rt  s     r8   meta_medianrL    s<    77U5-.L ??<((r:   c                    [        U 5      S:X  a  [        R                  " S5        [        R                  " U R                  U45      n[        XU5      nU R                  U5      U R                  U[        R                  S94$ )Nr   zmedian CUDA with indices outputr   )	r  rQ   alert_not_deterministicr  r   r  r   rX   r   )r   r   r  rt  s       r8   meta_median_mode_dimrO    sn     5V#%%&GH


u{{SF
3C+E@L%EJJ7 r:   c                     U $ rD   r4   r   s    r8   meta_logical_not_rQ    r  r:   c                   ^^ [         R                  " [        U5      U R                  5       :  S 5        [	        U5       H%  u  mm[         R                  " TS:  UU4S j5        M'     [        U5      U R                  5       -
  nSU-  [        U R                  5      -   n[        [        U5      5       Vs/ s H  oCU   X   -  PM     nnU R                  U5      $ s  snf )Nc                      g)NzZNumber of dimensions of repeat dims can not be smaller than number of dimensions of tensorr4   r4   r:   r8   rg   meta_repeat.<locals>.<lambda>  s    lr:   r   c                     > ST ST  3$ )Nz"Repeats cannot be negative, found r  r4   )r   reps   r8   rg   rT    s    8ZsKr:   r;  )	rX   rj   r   r   r#  ri   r   r   r   )r   r  num_new_dimensionspadded_sizer   target_sizerV  s       ` @r8   meta_repeatrZ    s    	LLG
"l G$31HK	
 % W
2++eDJJ.??K8=c'l8KL8K1q>GJ.8KKL>>+&& Ms   6Cc                     U $ rD   r4   r   s    r8   
meta_zero_r\    r  r:   c                     [        U[        R                  5      (       a   [        U R                  UR                  5        U $ rD   )rt   rX   r   rl   r   r   r   s     r8   meta_binop_inplacer_    s,     %&&

EKK8Kr:   c                 *   S nS nS nU" U 5      (       a  U" U5      (       a  [        S5      eU" U 5      (       a  U" U5      (       d  [        S5      e[        U[        R                  5      (       a   [	        U R
                  UR
                  5        U $ )a  
Some checks for inplace ops.
Checks for promotion rules for some dtypes.
int.add/sub_(float) and bool.add/sub_(others) are rejected.
Promoting in these in-place operations would require reallocating
and copying over elements, hence not allowed.
Checks for alpha param.
c                     [        U [        5      (       a   [        R                  " U R                  5      $ [        U [
        5      $ rD   )rt   r   rQ   rW  r`   r   rw   s    r8   is_integeric.meta_binop_inplace_alpha.<locals>.is_integeric  s1    c:&&))#))44c7++r:   c                     [        U [        5      (       a   [        R                  " U R                  5      $ [        U [
        5      $ rD   )rt   r   rQ   r  r`   r   rb  s    r8   
is_floatic,meta_binop_inplace_alpha.<locals>.is_floatic  s1    c:&&''		22c9--r:   c                     [        U [        5      (       a   [        R                  " U R                  5      $ [        U [
        5      $ rD   )rt   r   rQ   is_boolean_dtyper`   r   rb  s    r8   is_booleanic.meta_binop_inplace_alpha.<locals>.is_booleanic  s1    c:&&))#))44c8,,r:   z]Promotion of int.add/sub_(float) in in-place ops are not possible due to element size change.z_Promotion of book.add/sub_(others) in in-place ops are not possible due to element size change.)r3  rt   rX   r   rl   r   )r   r   rb  rc  rf  rj  s         r8   meta_binop_inplace_alpharl    s    $,.- Dj//k
 	

 D,u"5"5m
 	
 %&&

EKK8Kr:   c                 2    [        X[        R                  S9$ Nr  rV   r   rR   r   r   rb  s      r8   meta_binop_alpharq  "  s     $C$K$K r:   c                 2    [        U [        R                  S9$ rn  ro  )r   rO  s     r8   
meta_roundrs  .  s    <DD r:   c                   ^ ^^ [         R                  " [        R                  " TR                  5      U U4S j5        [        T[         R                  5      (       a;  [         R                  " [        R                  " TR                  5      U U4S j5        g [         R                  " [        T[        5      U U4S j5        g )Nc                  &   > T  STR                    3$ )Nz7: Expected input tensor to have an integral dtype. Got r   )r  r   s   r8   rg   #shift_dtype_check.<locals>.<lambda>8  s    7)RSWS]S]R^_r:   c                  &   > T  STR                    3$ )Nz6: Expected shift value to have an integral dtype. Got r   r  r  s   r8   rg   rv  =  s    wiUVYV_V_U`ar:   c                     > T  ST 3$ )Nz): Expected shift value to be an int. Got r4   rx  s   r8   rg   rv  B  s    wiHNr:   )rX   rj   rQ   rW  r`   rt   r   r   )r  r   r  s   ```r8   shift_dtype_checkrz  5  ss    	LLtzz*_ #u||$$""399-a	

 	sG$N	
r:   c                 J    [        SX5        [        X[        R                  S9$ )Nrshiftr  rz  rV   r   rR   r^  s     r8   meta_rshiftsr~  F  %    h,$C$K$K r:   c                 J    [        SX5        [        X[        R                  S9$ )Nlshiftr  r}  r^  s     r8   meta_lshiftsr  N  r  r:   c                 8    U R                  U R                  5      $ rD   r  r   s    r8   	meta_zeror  V  s    >>$**%%r:   c                     U $ rD   r4   r   r  s     r8   
meta_fill_r  [  r  r:   c                 .    [         R                  " U 5      $ rD   rG  r  s     r8   	meta_fillr  `      D!!r:   c                     U $ rD   r4   r   s    r8   
meta_relu_r  e  r  r:   c                 2    [        X[        R                  S9$ rn  ro  rp  s      r8   meta__add_relur  j  s     $C$K$K r:   c                 .    [         R                  " U 5      $ rD   rG  r   noiselowerr  r  r  s         r8   meta_rrelu_with_noiser  r  s    
 D!!r:   c                 Z    [         R                  " U 5      [         R                  " U5      4$ rD   rG  r  s         r8    meta_rrelu_with_noise_functionalr  z  s%     D!5#3#3E#:::r:   c                     U $ rD   r4   )r   r  r  r  r  s        r8   meta_rrelu_with_noise_r    s	     Kr:   c                 .    [         R                  " U 5      $ rD   rG  r   r   r   
accumulates       r8   meta_index_putr    r  r:   c                 F    [        U R                  UR                  5        U $ rD   rl   r   )r   r  values      r8   meta_masked_fill_r    s    DJJ

3Kr:   c                     U R                  U R                  5       5      R                  [        R                  " U 5      S9nU$ r   )r   r   r4  rQ   r   )r   r  r  masked_scales       r8   meta__masked_scaler    s<    >>$))+.1111$7 2 L r:   c                    ^ ^ [         R                  " UR                  [         R                  [         R                  4;   S 5        [         R                  " T R                  TR                  :H  U U4S j5        T $ )Nc                      g)NzMask must be bool or uint8r4   r4   r:   r8   rg   &meta_masked_scatter_.<locals>.<lambda>  s    9Ur:   c                  <   > ST R                    STR                    3$ )NzEmasked_scatter: expected self and source to have same dtypes but got r   r   )r   rw  s   r8   rg   r    s      **U6<<.:r:   )rX   rj   r`   r  r  )r   r  rw  s   ` `r8   meta_masked_scatter_r    sU    	LL

uzz5;;//1U 
LL

fll"	:
 Kr:   c                 z    [        X5      u  p[        R                  " U [        R                  S9n[	        X1U5      $ r   )r&   rX   r   r   r  )r   r  rw  r
  s       r8   meta_masked_scatterr    s5     "$-JDd%2I2IJFf55r:   c                 $    U R                  U5      $ rD   r  )r   r  ra  s      r8   meta_masked_scatter_backwardr    s    >>%  r:   c                     U $ rD   r4   r  s       r8   meta_index_put_r    r  r:   c           	        ^^^^^ SSK JnJn  [        R                  " U R                  5       S:H  S 5        [        R                  " UR                  5       S:H  S 5        U R                  5       nUR                  5       mUS   mUS   mUS   nTS   n	TX4m[        R                  " U" U" TS   T5      U" TS   T5      5      UUU4S j5        U(       a  U R                  [        R                  :H  =(       d    U R                  [        R                  :H  =(       a    U[        R                  :H  n
[        R                  " X@R                  :H  =(       d    U
S	 5        UR                  T5      R                  U5      nOUR                  T5      nU(       d]  TbZ  [        R                  " TR                  5       S:H  S
 5        [        R                  " U" TR                  5       T5      UU4S j5        U$ )Nr   )sym_andr  r0   c                      gr  r4   r4   r:   r8   rg   )common_meta_baddbmm_bmm.<locals>.<lambda>  r  r:   c                      gr  r4   r4   r:   r8   rg   r    r  r:   r   r3   c            	      .   > ST ST ST S    ST S    S3	$ r  r4   r  s   r8   rg   r    s3    RSURV
l<?*;2l1o=NbRr:   c                      g)Nzfout_dtype only supported for torch.float32 output with float16/bfloat16 inputs or same as input dtypesr4   r4   r:   r8   rg   r    s    |r:   c                      g)Nzself must be a 3D tensorr4   r4   r:   r8   rg   r    s    6Pr:   c                  0   > ST  STR                  5        3$ )Nz*Expected an input tensor shape with shape z but got shape: r   )r  self_baddbmms   r8   rg   r    s    @M]^j^o^o^q]rsr:   )r1  r  r  rX   rj   r   r   r`   rn  ro  rm  r   r4  )r  r  is_bmmr  rJ  r  r  r  res_rowsres_colssupported_out_dtyper
  r  r  r  r  s      `        @@@@r8   common_meta_baddbmm_bmmr    s   E	LL"$HI	LL"$HI;;=L;;=L	aB#AAHAHx*K	LL|A+VLOEU-VW	R
 LLEMM)KV\\U^^-K)5==( 	 	%<)<|	
 !!+.11)< !!+.l.\%%'1,.PQ<$$&4s	

 Mr:   c                     [        XS5      $ )NTr  )r   rW  s     r8   meta_bmmr    s    "4t44r:   c                     [        XSUS9$ )NT)rJ  r  )r   rW  rJ  s      r8   meta_bmm_dtyper    s    "4tyIIr:   c                 h    X-  nX-  nUS:w  a#  [        US:  5      [        US:  5      :w  a  US-  nU$ r+  )r  )r=   r>   qr~  s       r8   div_rtnr    s>    	A	A 	Av4A;$q1u+-	QHr:   c                     [        U U-   U-   XQS-
  -  -
  S-
  U(       a  US-
  OS-   U5      S-   nU(       a  US-
  U-  X-   :  a  US-  nU$ r  )r  )	inputSize
kernelSizer   r  r   r  rA  
outputSizes           r8   pooling_output_shape_pad_lrr    s     	 q.)* 	
 'vzA/ 	
 		  Nf$	(99!OJr:   c           	         ^^^ [         R                  " US:g  S 5        [         R                  " TS:  U4S j5        [         R                  " TTS-
  T-  S-   S-  :*  UUU4S j5        [        U TTTUTU5      $ )Nr   c                      g)Nzstride should not be zeror4   r4   r:   r8   rg   &pooling_output_shape.<locals>.<lambda>  s    &Ar:   c                     > ST  3$ )Nz'pad must be non-negative, but got pad: r4   pads   r8   rg   r    s    %LSE#Rr:   r3   r   c                     > ST ST ST  3$ )NzApad should be at most half of effective kernel size, but got pad=z, kernel_size=z and dilation=r4   )r  r  r  s   r8   rg   r    s"    OPSu U%,nXJ@r:   )rX   rj   r  )r  r  r  r   r  rA  s    `` ` r8   rz  rz    ss    	LL1AB	LLRS	LLa8+a/A55	
 ':sC9 r:   c           	        ^ ^^^^^^^^^	^
^^^^ T R                  5       nT	m[        R                  " TS:  =(       a    TS:  UU4S j5        [        R                  " TS:  =(       a    TS:  UU4S j5        [        R                  " TS:  =(       a    TS:  UU4S j5        T R                  S5      S:g  =(       a    T R                  S5      S:g  nU[        R                  :X  aC  [        R                  " US:H  =(       a    U=(       a    T R                  S5      S:g  U 4S	 j5        Or[        R                  " US:H  =(       a    T R                  S5      S:g  =(       a    U=(       d)    US:H  =(       a    U=(       a    T R                  S5      S:g  U 4S
 j5        [        R                  " TS-  T:  =(       a    TS-  T:  UUUU4S j5        [        R                  " TS:  =(       a    TS:  U
UU	UUU4S j5        g )Nr   c                     > ST  ST 3$ )Nz5kernel size should be greater than zero, but got kH: , kW: r4   )r~  r  s   r8   rg   $pool2d_shape_check.<locals>.<lambda>9  s    Gt6RTQUVr:   c                     > ST  ST 3$ )Nz0stride should be greater than zero, but got dH: , dW: r4   )r  r  s   r8   rg   r  =  s    B2$fRDQr:   c                     > ST  ST 3$ )Nz9dilation should be greater than zero, but got dilationH: , dilationW: r4   )	dilationH	dilationWs   r8   rg   r  A  s    KI;Vcdmcnor:   r3   r   r0  r0   c                  *   > ST R                  5        3$ )NzExpected 4D (batch mode) tensor expected for input with channels_last layout with optional 0 dim batch size for input, but got: r   r  s   r8   rg   r  I  s     CCH::<.Rr:   c                  *   > ST R                  5        3$ )NzYExpected 3D or 4D (batch mode) tensor with optional 0 dim batch size for input, but got: r   r  s   r8   rg   r  P  s    opupzpzp|o}~r:   c                      > ST ST ST ST  3$ )NzKpad should be smaller than or equal to half of kernel size, but got padW = z	, padH = z, kW = z, kH = r4   )r~  r  r  r  s   r8   rg   r  U  s$     ygbT>r:   c                  .   > ST ST  ST ST ST ST S3$ NzGiven input size: (r=   z). Calculated output size: (z). Output size is too smallr4   )r  r  rB  r  rC  rD  s   r8   rg   r  [  s8    %k]!K=* N$$0><.+ O##r:   )r   rX   rj   r   r  )r   r~  r  r  r  r  r  r  r  rB  r  r  rC  rD  r   r   
valid_dimsr  s   ``````````````   @r8   r{  r{  #  s   " 99;DL	LL
Q26V 
LL
Q26Q 
LLA')a-o
 A!#:

1(:J+++AI;*;A!);R	
 	QY<5::a=A-<* A	?j?UZZ]a-?~	
 
LL
a4+B!GtO	> 
LLq.\Q.	# 	#r:   r  r  r~  r  r  r  r  pTpHpW	dilationTr  r  r  r  r  r  r  r  r  c           
        ^ ^^^^^^^^^	^
^^^^^^^^^^^ T R                   n[        R                  " TS:  =(       a    TS:  =(       a    TS:  UUU4S j5        [        R                  " TS:  =(       a    TS:  =(       a    TS:  UUU4S j5        [        R                  " TS:  =(       a    TS:  =(       a    TS:  UUU4S j5        [        R                  " US;   UU 4S j5        [        U5       H@  mUS:X  a  TS:X  a  M  [        R                  " T R	                  T5      S:  UUU 4S j5        MB     U(       a;  [        R                  " TT:  =(       a    TT:  =(       a    TT:  UUUUUU4S	 j5        [        R                  " TS
-  T:  =(       a    TS
-  T
:  =(       a    TS
-  T	:  UUUU	UU
4S j5        [        R                  " TS:  =(       a    TS:  =(       a    TS:  UUUUUUU4S j5        g )Nr   c                     > ST ST  ST 3$ )Nz5kernel size should be greater than zero, but got kT: z, kH: r  r4   )r~  r  r  s   r8   rg   $pool3d_shape_check.<locals>.<lambda>}  s    $fRDrd,r:   c                     > ST ST  ST 3$ )Nz0stride should be greater than zero, but got dT: z, dH: r  r4   )r  r  r  s   r8   rg   r    s    >rd&FSURVWr:   c                     > ST ST  ST 3$ )Nz9dilation should be greater than zero, but got dilationT: z, dilationH: r  r4   )r  r  r  s   r8   rg   r    s    #M)M)Vr:   r  c                  &   > T  STR                    3$ )Nz/: Expected 4D or 5D tensor for input, but got: rR  )r  r   s   r8   rg   r    s    7)J5;;-Xr:   rX  c                  L   > T  STR                    STR                  T5       S3$ )NzZ: Expected input's non-batch dimensions to have positive length, but input has a shape of z and non-batch dimension z has length zero!)r   r   )r  r   r   s   r8   rg   r    s.    ) --2[[M+EJJqM?:KMr:   c                  .   > ST ST  ST ST ST ST S3$ )Nzinput image (T: ra  r:  z ) smaller than kernel size (kT:  kH:  kW: r~   r4   )r  r  r  r~  r  r  s   r8   rg   r    s4    "5'gYd6( C$$&4uRDbT<r:   r   c                  ,   > ST ST ST  ST ST ST 3$ )NzHpad should be smaller than or equal to half of kernel size, but got kT: r  r  z padT: z padW: z padH: r4   )r~  r  r  r  r  r  s   r8   rg   r    s1    $eB4uRDt72$gbTKr:   r3   c                  :   > ST ST ST  ST ST ST ST ST S3$ r  r4   )r  r  r  r  r  r  r  s   r8   rg   r    sD    !'!E7!G9AfX F((/y%'!F8 L'(r:   )r   rX   rj   r   r   )r   r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   s   `````````````````````  @r8   r  r  a  s   0 ::D	LL
Q$26$b1f	
 
LL
Q$26$b1f	
 
LLA9)a-9IM	
 
LLX
 4[19aJJqMA	
	  RK:GrM:fl 	
 
LL
Q"6a26"q&B,	
 	
 
LL
3v{3w!|	
 	
r:   c                 j   U R                   n[        U UUUUUUU	U
UUUUUUUUUUUU5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        g )Nr0  r0   r   r3   r   r  r\  )r   r%  r   r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                           r8   max_pool3d_backward_shape_checkr    s    2 ::D








+0 ;dQh8;dQh6;dQh8;dQh77D$(G47D$(E27D$(G47D$(F3r:   c                     U R                   n[        U UUUUUUUU	U
USSSUUUUUUUS5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        [        UUUS-
  U5        g )Nr3   Tr0  r0   r   r  )r   r%  r  r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   s                       r8   r  r    s    * ::D








			-2 ;dQh8;dQh6;dQh8;dQh7r:   c                    S nU" SU5      u  px[         R                  " [        U5      S;   S 5        [        U5      S:X  a  XxpOU" SU5      u  pU" SU5      u  pU" SU5      u  pU R                  S	5      nU R                  S
5      nU R                  S5      n[        R
                  " U 5      nU[         R                  :X  a*  [         R                  " U R                  5       S:H  S 5        OVU[         R                  :X  a*  [         R                  " U R                  5       S;   S 5        O[         R                  " SS 5        [        UX{XU5      n[        UXXU5      n[        U UUU	U
UUUUUUUUUU5        UUU4$ )Nc                    ^  [         R                  " [        U5      S;   U 4S j5        US   n[        U5      S:X  a  UOUS   nX#4$ )Nr_  c                     > ST  S3$ )Nzmax_pool2d: ra  r4   rb  s   r8   rg   Emax_pool2d_checks_and_compute_shape.<locals>.unpack.<locals>.<lambda>=  rd  r:   r   r3   re  rf  s   `   r8   ri  3max_pool2d_checks_and_compute_shape.<locals>.unpack:  rk  r:   r  rl  c                      g)NzOmax_pool2d: stride must either be omitted, a single int, or a tuple of two intsr4   r4   r:   r8   rg   5max_pool2d_checks_and_compute_shape.<locals>.<lambda>G  rp  r:   r   r   r  r  rv  r  r   r0  c                      g)NzMnon-empty 4D (batch mode) tensor expected for input with channels_last layoutr4   r4   r:   r8   rg   r  X  s    cr:   r  c                      g)Nz9non-empty 3D or 4D (batch mode) tensor expected for inputr4   r4   r:   r8   rg   r  ]      Or:   Fc                      g)NzAUnsupported memory format. Supports only ChannelsLast, Contiguousr4   r4   r:   r8   rg   r  b  s    Wr:   )rX   rj   r   r   rQ   r   r  r   r   rz  r{  )r   r  r   r  r  rA  ri  r~  r  r  r  r  r  r  r  rB  r  r  r   rC  rD  s                        r8   r?  r?  1  sy    M;/FB	LLFy a 6{aB&)	7+JD!*h7I**R.K**R.KBJ//6M+++IIK1c	
 
%11	1IIK6!O	

 	W	

 (RriXL&z2RIVK



$ k11r:   c                 |  ^ ^^^^^ [        TX#XEU5      u  nmm[        R                  " TR                  T R                  :H  U U4S j5        UmTR                  mUUUU4S jn	U	" T 5        U	" U5        [
        R                  " T5      n
[        R                  " TR                  TR                  TR                  U
S9$ )Nc                  <   > STR                    ST R                    3$ )NzExpected dtype z  for `gradOutput` but got dtype r   r  s   r8   rg   7meta_max_pool2d_with_indices_backward.<locals>.<lambda>  s    /$**-MkN_N_M`ar:   c                 l   > [        U TTS-
  T5        [        U TTS-
  T5        [        U TTS-
  T5        g )Nr0   r   r3   )r\  )r  r  r   rC  rD  s    r8   _check_dim_size>meta_max_pool2d_with_indices_backward.<locals>._check_dim_size  s9    q$q,7q$q,7q$q+6r:   r  )
r?  rX   rj   r`   r   rQ   r   r   r   r   )r%  r   r  r   r  r  rA  r   rB  r  r   r  r   rC  rD  s   ``         @@@@r8   %meta_max_pool2d_with_indices_backwardr  }  s     	,k7i		
 
LL

k'''a
 L99D7 7
 K G//5M;;

jj{{#	 r:   c                    [        XX#XE5      u  nnnU R                  5       S:X  a  U R                  S5      OSn	[        R                  " U 5      n
U R                  5       S:X  a  XgU/nOXXx/n[
        R                  " UU R                  U R                  U
S9[
        R                  " U[
        R                  U R                  U
S94$ r=  )
r?  r   r   rQ   r   rX   r   r`   r   r   r@  s               r8   meta_max_pool2d_with_indicesr    s     	,FX		
  %yy{a/UZZ^QF//6Myy{a;7\?++<<'		
 	++<<'		
 r:   c                   ^ ^^^
^^ [         R                  " T R                  S;   U 4S j5        T R                  n[        US-
  U5       H1  m
[         R                  " T R	                  T
5      S:  U
U 4S j5        M3     [         R                  " [        T5      S:H  S 5        [         R                  " [        U5      S:H  S 5        T R	                  S	5      nT R	                  S
5      mT R	                  S5      mUS:X  a  T R	                  S5      nOSn[         R                  " T R                  TR                  :H  S 5        [         R                  " TR                  S:H  U4S j5        TR	                  S5      nTR	                  S5      nTR	                  S5      m
[         R                  " Xv:  S 5        [         R                  " X:H  S 5        [         R                  " T
S:H  U
4S j5        [         R                  " US   TS   -   S-
  T:*  UU4S j5        [         R                  " US   TS   -   S-
  T:*  UU4S j5        T R                  5       S:X  a  XeUS   US   /n	O
XRS   US   /n	[         R                  " U	T R                  T R                  S9[         R                  " U	[         R                  T R                  S94$ )Nr  c                  "   > ST R                    3$ )Nz:fractional_max_pool2d: Expected 3D or 4D tensor, but got: r  r   s   r8   rg   ,meta_fractional_max_pool2d.<locals>.<lambda>  s    LTYYKXr:   r0   r   c                  2   > STR                  5        ST  S3$ )Nz_fractional_max_pool2d: Expected input to have non-zero  size for non-batch dimensions, but got r  z emptyr   )r   r   s   r8   rg   r    s%     77;yy{mCSTUSVV\^r:   r   c                      g)NzNfractional_max_pool2d: kernel_size musteither be a single int or tuple of Intsr4   r4   r:   r8   rg   r         2r:   c                      g)NzOfractional_max_pool2d: output_size must either be a single int or tuple of Intsr4   r4   r:   r8   rg   r    r  r:   rv  r  r   r0  r3   c                      g)Nz6Expect _random_samples to have the same dtype as inputr4   r4   r:   r8   rg   r    s    Hr:   c                  "   > ST R                    3$ )Nz1Expect _random samples to have 3 dimensions got, r  )random_sampless   r8   rg   r    s    CNDWDWCXYr:   c                      g)Nz=Expect _random_samples.size(0) no less then input batch size.r4   r4   r:   r8   rg   r    r   r:   c                      g)Nz<Expect _random_samples.size(1) equals to input channel size.r4   r4   r:   r8   rg   r        Nr:   c                     > ST  S3$ )Nz/Expect _random_samples.size(2) equals to 2 got .r4   )r   s   r8   rg   r    s    #RSTRUUV!Wr:   c                     > STS    ST  3$ )Nz%fractional_max_pool2d: kernel height r   z' is too large relative to input height r4   )input_heightr  s   r8   rg   r    s    7A7GGno{n|}r:   c                     > STS    ST  3$ )Nz$fractional_max_pool2d: kernel width r3   z& is too large relative to input width r4   )input_widthr  s   r8   rg   r    s    6{1~6FFlmxlyzr:   r	  )rX   rj   r   r   r   r   r`   r   r   r   r   )r   r  r  r  r   input_channelsinput_batchr  cr   r   r  r  s   `` `      @@@r8   meta_fractional_max_pool2dr    sf   	LL		VX 99D4!8T"IIaL1^	
 # 
LLKA	2
 
LLKA	2 YYr]N99R=L))B-Kqyiil	LL

n***H 
LLq Y
 	AAAAAA	LL	O 
LL	N 
LLaWX	LLAQ'!+|;} 
LLAQ'!+{:z
 xxzQ[^[^LAA? 	**;;	

 	++;;	
 r:   c                 H   [         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " U(       + =(       d    [        U5      S;   S 5        U(       d  UOUS   n	U(       d  UO[        U5      S:X  a  U	OUS   n
U(       d  UO[        U5      S:X  a  U	OUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " U R                  S	;   S
 5        U R                  S:X  a  U R	                  S5      OSnU R	                  S5      nU R	                  S5      nU R	                  S5      nU R	                  S5      n[        UXlXU5      n[        UX}U
UU5      n[        UXUUU5      n[        U UUUUU	U
UUUUUUUUUUUUUS5        U R                  S:H  =(       a'    [        R                  " U 5      [         R                  :H  nU R                  S:X  a  UUUU4nOUUUUU4nU R                  U5      nU R                  U[         R                  S9nU(       a:  UR                  [         R                  S9nUR                  [         R                  S9nUU4$ )Nr  c                      gNzMmax_pool3d: kernel_size must either be a single int, or a tuple of three intsr4   r4   r:   r8   rg   .meta_max_pool3d_with_indices.<locals>.<lambda>1      _r:   r   r3   r   c                      gNzQmax_pool3d: stride must either be omitted, a single int, or a tuple of three intsr4   r4   r:   r8   rg   r#  9      cr:   c                      gNzImax_pool3d: padding must either be a single int, or a tuple of three intsr4   r4   r:   r8   rg   r#  A      [r:   c                      gNzJmax_pool3d: dilation must be either a single int, or a tuple of three intsr4   r4   r:   r8   rg   r#  I  r  r:   r  c                      gr  r4   r4   r:   r8   rg   r#  Q  r  r:   rX  r>  rv  r  r   zmax_pool3d_with_indices()r0  r   r   )rX   rj   r   r   r   rz  r  rQ   r   r  r   r   r4  )r   r  r   r  r  rA  r  r~  r  r  r  r  r  r  r  r  r  r  r	  r  r  r  r  r  r  r  r  r   r   r   s                                 r8   meta_max_pool3d_with_indicesr/  %  s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
  %zzQUZZ^AFjjnGJJrNEjjnGZZ^F yIE"7BB	9MG!&""iKF








#+4 	

aXE77>%BXBXX  zzQeWf5	WeWf=	
//)
$Cooiu{{o;Gff5#9#9f:**5+A+A*B<r:   c                 r   [         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n	[        U5      S:X  a  UOUS   n
[         R                  " U(       + =(       d    [        U5      S;   S 5        U(       d  UOUS   nU(       d  U	O[        U5      S:X  a  UOUS   nU(       d  U
O[        U5      S:X  a  UOUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " [        U5      S;   S 5        US   n[        U5      S:X  a  UOUS   n[        U5      S:X  a  UOUS   n[         R                  " UR                  S	;   S
 5        UR	                  S5      nUR	                  S5      nUR	                  S5      nUR	                  S5      nU R	                  S5      nU R	                  S5      nU R	                  S5      n[        UU UUUU	U
UUUUUUUUUUUUUUUS5        UR                  S:H  =(       a'    [        R                  " U5      [         R                  :H  nUR                  UR                  5      nU(       a  UR                  [         R                  S9nU$ )Nr  c                      gr"  r4   r4   r:   r8   rg   7meta_max_pool3d_with_indices_backward.<locals>.<lambda>  r$  r:   r   r3   r   c                      gr&  r4   r4   r:   r8   rg   r2    r'  r:   c                      gr)  r4   r4   r:   r8   rg   r2    r*  r:   c                      gr,  r4   r4   r:   r8   rg   r2    r  r:   r  c                      gr  r4   r4   r:   r8   rg   r2    r  r:   r>  rv  r  r   z"max_pool3d_with_indices_backward()rX  r   )rX   rj   r   r   r   r  rQ   r   r  r   r   r4  )r%  r   r  r   r  r  rA  r   r  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r(  s                                r8   %meta_max_pool3d_with_indices_backwardr7    s    
LLKF"_ 
QB;1$+a.B;1$+a.B	LL
+c&kV+c vayBc&kQ&6F1IBc&kQ&6F1IB	LLG[ 
B7|q gajB7|q gajB	LLH\ I ]a/	Xa[I ]a/	Xa[I	LL

fK
 jjnGJJrNEjjnGZZ^FR Er"Gb!F#








,/8 	

aXE77>%BXBXX  -J]]1G1G]H
r:   gridc                   ^ ^^ [         R                  " T R                  TR                  :H  UU 4S j5        [         R                  " T R                  [         R                  :H  =(       a    TR                  [         R                  :H  UU 4S j5        [         R                  " T R
                  S   TR
                  S   :H  UU 4S j5        [         R                  " TR
                  S   T R                  S-
  :H  UU 4S j5        [        ST R                  5       H/  m[         R                  " T R
                  T   S:  UU 4S j5        M1     g )	Nc                  <   > STR                    ST R                    3$ )NzNgrid_sampler(): expected input and grid to be on same device, but input is on z and grid is on r  r8  r   s   r8   rg   +check_grid_sampler_common.<locals>.<lambda>  s"    \\N"24;;-Ar:   c                  <   > STR                    ST R                    3$ )NzTgrid_sampler(): expected input and grid to have torch.strided layout, but input has z and grid has )r   r;  s   r8   rg   r<    s!    nT[[MCr:   r   c                  <   > STR                    ST R                    3$ )NzZgrid_sampler(): expected grid and input to have same batch size, but got input with sizes  and grid with sizes rR  r;  s   r8   rg   r<    s"      %},A$**Or:   r   r   c                  B   > STR                   S-
   ST R                   3$ )Nz+grid_sampler(): expected grid to have size r   z, in last dimension, but got grid with sizes )r   r   r;  s   r8   rg   r<    s'    9%**q.9I J226**?r:   c                  *   > STR                    ST  S3$ )NzYgrid_sampler(): expected input to have non-empty spatial dimensions, but input has sizes r  r  rR  r  s   r8   rg   r<    r  r:   )rX   rj   r   r   r  r   r   r   )r   r8  r   s   ``@r8   check_grid_sampler_commonrB    s    	LL#	
 
LL%F$++*F	
 
LLA$**Q-'	
 
LL

2%**q.(	
 1ejj!KKNQ	
 "r:   c                        \ rS rSrSrSrSrSrg)GridSamplerInterpolationi  r   r3   r   r4   N)r   
__module____qualname____firstlineno__BILINEARNEARESTBICUBIC__static_attributes__r4   r:   r8   rD  rD    s    HGGr:   rD  interpolation_modec                 6  ^ ^ [         R                  " T R                  S:H  =(       a    T R                  TR                  :H  UU 4S j5        [         R                  " T R                  S:H  =(       a    U[        R                  R
                  :H  (       + S 5        g )NrX  c                  <   > STR                    ST R                    3$ )Nzdgrid_sampler(): expected 5D input and grid with same number of dimensions, but got input with sizes r?  rR  r;  s   r8   rg   'check_grid_sampler_3d.<locals>.<lambda>  s!    449KK=#DJJ<1r:   c                      g)Nz<grid_sampler(): bicubic interpolation only supports 4D inputr4   r4   r:   r8   rg   rO  &  r  r:   )rX   rj   r   rD  rJ  r  )r   r8  rL  s   `` r8   check_grid_sampler_3drQ    sp    	LL

a3EJJ$))3	
 
LLJJ!O M"&>&F&F&L&LL	
 	Or:   c                     US   nU(       a$  [         R                  " U[         R                  S9nOS n[         R                  " U[         R                  S9n	X4$ Nr   r   )rX   r  r   r   
r%  r   r8  rL  padding_modealign_cornersr?  input_requires_gradr(  	grad_grids
             r8   grid_sampler_2d_backward_metarY  *  sO     &a.%%e5;R;RS

  U5L5LMI""r:   c                     [        X5        [        XU5        U R                  S   nU R                  S   nUR                  S   nUR                  S   nUR                  S   n	U R                  XVXxU	45      $ )Nr   r3   r   r0   )rB  rQ  r   r   )
r   r8  rL  rU  rV  r  Cout_Dout_Hout_Ws
             r8   grid_sampler_3dr_  =  sn     e*%'9:AAAAJJqMEJJqMEJJqME??A%677r:   rX  c                     [        X5        [        XU5        US   nU(       a$  [        R                  " U[        R                  S9nOS n[        R
                  " U[        R                  S9n	X4$ rS  )rB  rQ  rX   r  r%  r   rT  s
             r8   grid_sampler_3d_backwardra  P  sg     e*%'9:%a.%%!?!?

 
  U5S5STI  r:   c                     UR                  S5      nU(       d  [        R                  " U5      nXCS'   [        R                  " U /UQ70 UD6$ )Nr`   )r_   rQ   	get_dtyperX   r   )r   r  rS   rO  r`   s        r8   fullrd  h  sA    JJwE
+7O;;t-d-f--r:   c           	      V   U[         R                  :X  a  [         R                  " US L S 5        [         R                  " SUc  U R                  OUUUc  U R
                  OUUS9nU R                  (       a>  UR                  U R                  5       U R                  5       U R                  5       5        O/UR                  U R                  5       U R                  5       S5        UR                  S5        U$ [        R                  R                  U UUUUUS9nUR!                  S5        U$ )Nc                      g)Nz9memory format option is only supported by strided tensorsr4   r4   r:   r8   rg   zeros_like.<locals>.<lambda>  r  r:   r   r  Tr  )rX   
sparse_coorj   r   r`   r   	is_sparsesparse_resize_and_clear_r   
sparse_dim	dense_dimr   _coalesced_r-   r   r6  fill_)r   r`   r   r   r   r   r  s          r8   r  r  s  s     !!!T!O	

 kk %$**5"(.4;;f!
 >>((		T__.0@ ((dhhj!D

//
!
!# " C IIaLJr:   r   c                    Uc  [         R                  " 5       nUc  [         R                  " 5       nUc  [         R                  n[         R                  " XX#US9$ r  rX   r   get_default_devicer  r   r   r`   r   r   r   r   s         r8   	meta_onesrs    P     }'')~))+~;;&J r:   c                    Uc  [         R                  " 5       nUc  [         R                  " 5       nUc  [         R                  n[         R                  " XX#US9$ r  rp  rr  s         r8   
meta_zerosrv    rt  r:   c                 .    [         R                  " U 5      $ rD   rQ   clone_preserve_strides)r   r7  r   r   s       r8   meta_select_scatterrz        ''--r:   c                 .    [         R                  " U 5      $ rD   rx  )r   r7  r   r   r   steps         r8   meta_slice_scatterr~    r{  r:   dim_post_exprwrap_scalarc           	          US::  a  U(       d  [        SU S35      eSnU* nUS-
  nX:  d  X:  a  [        SU  SU SU S35      eU S:  a  X-  n U $ )	Nr   zdim_post_expr=z <= 0 but wrap_scalar is Falser3   zdim z out of bounds (r}   r~   r%  )r   r  r  r  r  s        r8   r   r     s}      /MN  .C
!
C
yCItC5(8RuAFGG
QwJr:   c                 L    U R                  5       S:X  a  S$ U R                  U   $ r+  rY  )r  r   s     r8   ensure_nonempty_sizer    s!    11.!''#,.r:   c                 F  ^ ^^^ [        T R                  5       S5      n[        TR                  5       S5      n[        R                  " X4:H  S 5        [	        U5       H@  mTT:w  d  M  [        R                  " [        TT5      [        T T5      :*  UUUU 4S j5        MB     g )Nr3   c                      g)NzDIndex tensor must have the same number of dimensions as input tensorr4   r4   r:   r8   rg   $gather_shape_check.<locals>.<lambda>  s    Vr:   c                  N   > ST STR                    3STR                    ST  3-   $ )Nz!Size does not match at dimension z expected index  to be no larger than self  apart from dimension rR  )r   r   r   r   s   r8   rg   r    s5    ;A3>Nu{{m\/

|;QRUQVWXr:   )r  r   rX   rj   r   r  )r   r   r   	self_dims
index_dimsr   s   ```  @r8   gather_shape_checkr    s}    DHHJ"IUYY[!$J	LLV 98LL$UA.2FtQ2OOX r:   c                   ^ SSK Jn  [        XR                  5       5      nU" TR	                  5       S:H  5      nU(       df  [
        R                  " TR                  [
        R                  :H  =(       d    TR                  [
        R                  :H  U4S j5        [        XT5        U R                  TR                  5      $ )Nr   r!  c                  "   > ST R                    3$ )Nz8gather(): Expected dtype int32/int64 for index, but got r   r   s   r8   rg   meta_gather.<locals>.<lambda>  s    Nu{{m\r:   )r1  r"  r   r   r   rX   rj   r`   r   r   r  r   r   )r   r   r   sparse_gradr"  wrapped_dimis_index_emptys     `    r8   meta_gatherr    s~    D hhj1K#EKKMQ$67NKK5::%A		)A\	
 	4e4>>%++&&r:   c                     U(       a<  U S:X  a  gU S:X  a  gU S:X  a  gU S:X  a  gU S	:X  a  g
[         R                  " SS 5        g U S:X  a  gU S:X  a  g[         R                  " SS 5        g )Nr$  
REDUCE_ADDr  REDUCE_MULTIPLYmeanREDUCE_MEANamaxREDUCE_MAXIMUMaminREDUCE_MINIMUMFc                      g)Nz=reduce argument must be either sum, prod, mean, amax or amin.r4   r4   r:   r8   rg   #get_operator_enum.<locals>.<lambda>  s    Sr:   addmultiplyc                      g)Nz/reduce argument must be either add or multiply.r4   r4   r:   r8   rg   r  "  s    $Ur:   r  )reduce_use_new_optionss     r8   get_operator_enumr    s{    e$ ##S	
 	e
"$UUVr:   c                 n  ^  SSK Jn  U" UR                  5       S:g  5      (       aZ  [        R                  " UR
                  [        R                  :H  =(       d    UR
                  [        R                  :H  U 4S j5        Ub3  [        R                  " UR
                  UR
                  :H  U 4S j5        g g )Nr   )r  c                     > T  S3$ )Nz((): Expected dtype int32/int64 for indexr4   method_names   r8   rg   ,scatter_gather_dtype_check.<locals>.<lambda>-  s    {m#KLr:   c                     > T  S3$ )Nz0(): Expected self.dtype to be equal to src.dtyper4   r  s   r8   rg   r  3  s    {m#STr:   )r1  r  r   rX   rj   r`   r   r   )r  r   r   src_optr  s   `    r8   scatter_gather_dtype_checkr  '  sy    CU[[]a'((KK5::%A		)AL	

 JJ'--'T	
 r:   c                     [        U S5      $ r2   )r  r   s    r8   ensure_nonempty_dimr  7  s    sA;r:   c                   ^ ^^^ SSK Jn  U" TR                  5       S:H  5      (       a  g [        R                  " [        T R                  5       5      [        TR                  5       5      :H  S 5        [        T R                  5       5      n[        U5       HB  nUT:X  a  M  [        TU5      n[        T U5      n[        R                  " Xx:*  UUU 4S j5        MD     Tb  [        R                  " [        T R                  5       5      [        TR                  5       5      :H  S 5        [        U5       H9  n[        TU5      n[        TU5      n	[        R                  " Xy:*  UU4S j5        M;     g g )Nr   r!  c                      g)NzCIndex tensor must have the same number of dimensions as self tensorr4   r4   r:   r8   rg   %scatter_shape_check.<locals>.<lambda>C  r  r:   c                  H   > STR                    STR                    3ST  3-   $ )NExpected index r  r  rR  )r   r   r   s   r8   rg   r  Q  s*    oekk]2Mdjj\Z&se,-r:   c                      g)NzBIndex tensor must have the same number of dimensions as src tensorr4   r4   r:   r8   rg   r  Y  s    Xr:   c                  <   > ST R                    STR                    3$ )Nr  z to be no larger than src rR  )r   r  s   r8   rg   r  `  s    /%++6PQXQ^Q^P_`r:   )	r1  r"  r   rX   rj   r  r   r   r  )
r   r   r   r  r"  r  r   index_d_sizeself_d_size
src_d_sizes
   ````      r8   scatter_shape_checkr  <  s"   Dekkmq())	LLDHHJ'+>uyy{+KKU
 $DHHJ/I 98+E15*43'-	
  
+/B7;;=/QQX	
 y!A/q9L-gq9JLL*` " r:   c                     [        XR                  5       5      n[        SXU5        [        XX#5        Ub  [	        XE5        g g )Nscatter)r   r   r  r  r  )r   r   r   r7  r  r  r  s          r8   scatter_meta_implr  e  s;     hhj1Ky$s;56'3 r:   c                 R    [        XX#S5        U R                  U R                  5      $ Nr  r  r   r   r   r   r   r7  s       r8   meta_scatter_addr  n  s!    dU3>>$**%%r:   c                      [        XX#S5        U $ r  r  r  s       r8   meta_scatter_add_r  t  s    dU3Kr:   c                     [        U[        R                  5      (       a  UOS n[        XX%U5        U R	                  U R
                  5      $ rD   )rt   rX   r   r  r   r   r   r   r   src_or_valuerx  r7  s         r8   meta_scatterr  z  s:     %\5<<@@,dCdV4>>$**%%r:   c                 f    [        U[        R                  5      (       a  UOS n[        XX%U5        U $ rD   )rt   rX   r   r  r  s         r8   meta_scatter_r    s,     %\5<<@@,dCdV4Kr:   queryr   r  	dropout_p	is_causalreturn_debug_maskr  c           	      4   U R                  S5      nU R                  S5      nU R                  S5      n	U R                  S5      n
UR                  S5      n[        R                  " U 5      n[        R                  " XxU	4[        R                  U R
                  S9nU(       a`  U
S:  a  SOSn[        R                  " X-  5      nUS::  a  SnOUS::  a  Sn[        R                  " XxX4U R                  U R
                  S9nO*[        R                  " SU R                  U R
                  S9n[        R                  R                  (       a#  [        R                  R                  5       (       d  [        U 5      S	:X  aI  [        R                  " S
[        R                  SS9n[        R                  " S
[        R                  SS9nOH[        R                  " S[        R                  SS9n[        R                  " S
[        R                  SS9nUUS S U	UUUU4	$ )Nr   r3   r   r0   r	  @         r   r4   r   )r   rX   r   r   r\   r   r  ceilr`   r  r  r   r  r  r   ry  )r  r   r  r  r  r  r  r   	num_headsmax_seqlen_batch_qhead_dimmax_seqlen_batch_k	attention	logsumexpblocksize_cmax_seqlen_k
debug_maskseedoffsets                      r8   (meta__scaled_dot_product_flash_attentionr    s    AJ

1IAzz!}H!  'I	 23kk||I %]cyy!3!AB$L3&L[[$6E++<<

 [[%++ellK
 }}UZZ4466+e:LPU:U{{2UZZ?Ruzz&A{{Aell6BRu||FC 	
 
r:   	q_descale	k_descale	v_descalec
           	          U R                   [        R                  :X  a  U R                  [        R                  5      n [        U UUUUUU	5      $ rD   )r`   rX   rp  r4  ro  r  )
r  r   r  r  r  r  r  r  r  r  s
             r8   2meta__scaled_dot_product_flash_attention_quantizedr    sJ     {{e)))(3 r:   	res_shape.c                   ^  [        T R                  5      U:X  a  [        R                  " T 5      nU$ [	        / SQU 4S jSS9nU Vs/ s H  oAU   PM	     nn[        [        U5      5       Vs/ s H  ocR                  U5      PM     nn[        R                  " UT R                  T R                  S9R                  U5      nU$ s  snf s  snf )N)r   r3   r   r0   c                 *   > TR                  5       U    $ rD   r,  )idxr  s    r8   rg   ,alloc_with_matching_layout.<locals>.<lambda>  s    %,,.*=r:   Tr   r	  )ri   r   rX   r   sortedr   r   r   r   r`   r   r   )r  r  r  	dim_orderr  permuted_shaper   final_permutes   `       r8   alloc_with_matching_layoutr    s     U[[Y&u% J =t
	 5>>ISC.I>5:3y>5JK5J+5JKkk%++ell

'-
  	 J ?Ks   C.C
	attn_biascompute_log_sumexpc	           	         U R                  S5      n	U R                  S5      n
U R                  S5      nUR                  S5      nUR                  S5      nXX4n[        X5      n[        R                  " XUS4[        R                  U R
                  S9n[        R                  " S[        R                  SS9n[        R                  " S[        R                  SS9nUUS S UUUUS 4	$ Nr   r3   r   r   r	  r4   r   r   r  rX   r   r\   r   r   )r  r   r  r  r  r  r  r  r  r  rg  S_QS_KVD_Vr  r  
logsum_expr  r  s                      r8   (meta__scaled_dot_product_cudnn_attentionr    s     	

1A

1A
**Q-C88A;D
**R.Cs I
$U
6C	
sAkk||J ;;rF;D[[5::f=F 	
 
r:   c           	         U R                  S5      nU R                  S5      n	U R                  S5      n
UR                  S5      nUR                  S5      nXX4n[        X5      n[        R                  " XU
4[        R                  U R
                  S9n[        R                  " S[        R                  SS9n[        R                  " S[        R                  SS9nUUS S U
UUUS 4	$ r  r  )r  r   r  r  r  r  r  r  r  H_Qr  r  r  r  r  r  r  r  s                     r8   5meta__scaled_dot_product_fused_attention_overrideabler  1  s     	

1A
**Q-C
**Q-C88A;D
**R.C"I
$U
6C	
kk||J ;;rF;D[[5::f=F 	
 
r:   r  r  	cum_seq_q	cum_seq_kmax_qmax_kphilox_seedphilox_offsetc                     [         R                  " U5      n[         R                  " U5      n[         R                  " U5      nUUU4$ rD   rG  )r  r  r   r  r   r  r  r  r  r  r  r  r   r  r  grad_qgrad_krL  s                     r8   'meta__scaled_dot_product_flash_backwardr  \  sA    , e$Fc"Fe$F66!!r:   	attn_maskc                    U R                  S5      nU R                  S5      nU R                  S5      n	[        R                  " U 5      n
[        R                  " UU	U4[        R                  U R
                  S9R                  SS5      nU
U4$ )Nr   r3   r   r	  )r   rX   r   r   r\   r   r  )r  r   r  r  r  r  r  r   r  r  r  r  s               r8   0meta__scaled_dot_product_flash_attention_for_cpur  x  s     AJ

1IA  'I	

 kk|| i1o  	 r:   c
                 `   [         R                  " UR                  5       SUR                  UR                  S9n
[         R                  " UR                  5       SUR                  UR                  S9n[         R                  " UR                  5       SUR                  UR                  S9nXU4$ )Nr   r   r3   r0   r	  )rX   empty_permutedr   r`   r   )r  r  r   r  r   r  r  r  r  r  r  r  rL  s                r8   9meta__scaled_dot_product_flash_attention_for_cpu_backwardr    s    & !!

kk||	F !!
iizz	F !!

kk||	F 6!!r:   dropout_maskc                   ^ ^^^^^^^ S nU" T 5      u  mmU" U5      u  pU" U5      u  pTR                   u  mmmmU	R                   u  pmn
UUUUUU U4S jnUUUUU4S jnTS:  d  UT:  a  TS:  a  U" 5       $ U" 5       $ )Nc                 p   U R                  5       S:X  a  U R                  S5      S4$ U R                  5       S:  ax  Sn[        U R                  5       S-
  5       H  nXR                  U   -  nM     U R	                  XR                  S5      U R                  S5      U R                  S5      5      S4$ U S	4$ )
Nr0   r   Tr0  r3   rv  r  r   F)r   r  r   r   viewr   )r=   r   r   s      r8   	ensure_4dBmeta__scaled_dot_product_attention_math_for_mps.<locals>.ensure_4d  s    557a<;;q>4''UUWq[J1557Q;'ggaj(
 (66*ffRj!&&*affRjI4OOe8Or:   c                  j  > TR                  TR                  5      n T	(       a  U R                  T5      n TR                  TTTT45      nT	(       aa  TR                  5       S:X  a  UR	                  S5      nX4$ [        TR                  S S 5      UR                  SS -   nUR                  U5      nX4$ )Nr0   r   rv  r3   r0  )r   r   view_asr   squeezer   r  )
r   attnr   r   max_seq_lengthnum_headq_q_sizer  
unsqueezeds
      r8   sdpa_vector_fast_mpsMmeta__scaled_dot_product_attention_math_for_mps.<locals>.sdpa_vector_fast_mps  s    ll288$++e$C||Z6>JKyy{a||A y U[["-.Aa@yy'yr:   c                  p   > Sn TR                  TR                  5      nTR                  TTTU T45      nX4$ )Nr  r  )blocksr   r9  r   	head_sizer  r  r  s      r8   sdpa_vector_2pass_mpsNmeta__scaled_dot_product_attention_math_for_mps.<locals>.sdpa_vector_2pass_mps  s<    ll288$||Z669$UV  r:   i   i   rR  )r  r   r  r  r  r  r  r  r  k_rT   v_k_sizer  r!  r   r   r  r  r  r  r  s   `              @@@@@@@r8   /meta__scaled_dot_product_attention_math_for_mpsr&    s    	 u%NB
cNEBeEB.0hh+J&)#%88 A~q ! ! 	$FVO$8N$&&#%%r:   c           	      R   U R                  SS5      n UR                  SS5      nUR                  SS5      nU R                  S5      nU R                  S5      n	U R                  S5      n
UR                  S5      n[        R                  " XXU R                  U R
                  S9n[        R                  R                  (       a0  [        R                  R                  5       (       a   U(       a  U	OSnO%U(       a  [        R                  " U	S-  5      S-  OSn[        R                  " XU4[        R                  U R
                  S9nUR                  SS5      n[        R                  " S[        R                  S	S9n[        R                  " S[        R                  S	S9nXUU4$ )
Nr3   r   r   r  r   r	  r  r4   r   )r  r   rX   r   r`   r   r  r  r   r  r  r  r\   r   )r  r   r  r  r  r  r  r  r  r  r  Kvr  logsumexp_dimr  r  r  s                    r8   ,meta__scaled_dot_product_efficient_attentionr*    s?    OOAq!E
--1
COOAq!E

1A

1A

2I	BB
++aIU\\
RC}}UZZ4466	 0Q2D		!b&)B.!	
}%kk||J --1
C ;;rF;D[[5::f=FD&((r:   grad_input_maskc                    UR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      nUR                  S5      n[        R                  " XUU4SUR                  UR                  S9n[        R                  " XUU4SUR                  UR                  S9n[        R                  " XUU4SUR                  UR                  S9nS nUb  U
S   (       ax  UR                  S5      nUS-  S:X  a  UO
US-   US-  -
  n[        UR                  5       5      nUUS'   [        R                  " UUR                  UR                  S9nUS	S U24   nUUUU4$ )
Nr   r3   r   r0   r
  r	  r   r  .)r   rX   r  r`   r   r   r   )r  r  r   r  r  r   r  r   r  r  r+  r  r  r   r  r  r  
head_dim_vr  r  r  rL  	grad_biaslastDimlastDimAligned	new_sizess                             r8   +meta__scaled_dot_product_efficient_backwardr2  '  su   ( AJ

1IJJqMEzz!}HAJHHQKE!!	x0kk||	F !!	x0iizz	F !!	z2kk||	F I!3..$$+bLA$57R<'TV,;V)*	&	"KKY__Y5E5E
	 c8G8m,	669,,r:   c                     [         R                  " U5      n[         R                  " U5      n[         R                  " U5      nUUU4$ rD   rG  )r  r  r   r  r   r  r   r  r  r  r  r  r  r  r  r  r  r  rL  s                      r8   'meta__scaled_dot_product_cudnn_backwardr4  c  sA    . e$Fc"Fe$F66!!r:   window_size_leftwindow_size_right	seqused_kalibi_slopesc                    Uc  U R                  S5      OUR                  5       S-
  nUc  U R                  S5      OUnUc  UR                  S5      OUnU R                  S5      nU R                  S5      n[        R                  " U 5      nUc2  [        R                  " UUU4[        R
                  U R                  S9nOAU R                  S5      n[        R                  " UU4[        R
                  U R                  S9nU	(       ac  US:  a  SOSn[        R                  " UU-  5      nUS::  a  SnOUS::  a  Sn[        R                  " UUUU4U R                  U R                  S9nO*[        R                  " SU R                  U R                  S9nS	u  nn[        R                  R                  (       al  [        R                  R                  5       (       aI  [        R                  " S
[        R                  SS9n[        R                  " S
[        R                  SS9nOH[        R                  " S[        R                  SS9n[        R                  " S
[        R                  SS9nUUUUU4$ )Nr   r3   r  r   r	  r  r  r  NNr4   r   r   )r   r   rX   r   r   r\   r   r  r  r`   r  r  r   r  r   ry  )r  r   r  r  r  r  r  r  r  r  r  r5  r6  r7  r8  r   r  r  r  r  r  r  total_qr  r  r  r  r  s                               r8   meta__flash_attention_forwardr<    s   4 #,"3A9JQ9NJ*3*;A(1(9!u

2Izz"~H   'IKK$67++<<
	 **Q-KK ELL
	 %]cyy!3k!AB$L3&L[[$6E++<<

 [[%++ellK
 LD&}}UZZ4466{{2UZZ?Ruzz&A{{Aell6BRu||FC r:   c                     U R                   [        R                  :X  a  U R                  [        R                  5      n [        U UUUUUUUUU	UUUUU5      $ rD   )r`   rX   rp  r4  ro  r<  )r  r   r  r  r  r  r  r  r  r  r  r  r  r  r5  r6  r7  r8  s                     r8   'meta__flash_attention_forward_quantizedr>    sb    * {{e)))(( r:   c                     [         R                  " U5      n[         R                  " U5      n[         R                  " U5      nUUU4$ rD   rG  )r  r  r   r  r   r  r  r  r  r  r  r  r   r  r  r5  r6  
grad_querygrad_key
grad_values                       r8   meta__flash_attention_backwardrC    sA    0 !!%(J$H!!%(Jx++r:   cu_seqlens_qcu_seqlens_kmax_seqlen_qr  custom_mask_typecausal_diagonalseqlen_kwindow_sizec           	         U R                  S5      nU R                  S5      nUR                  S5      nU R                  S5      nUR                  S5      n[        R                  " UUUUU R                  U R                  S9nUb  UR                  S5      S-
  OUnUnUb  Uc  [        S5      eUnUb  UOUnU
(       a  [        R                  " US-  5      S-  OSn[        R                  " UUU4[        R                  U R                  S9n[        R                  " S[        R                  S	S9n[        R                  " S[        R                  S	S9nUUUUUU4$ )
Nr   r3   r  r   r	  z;max_seqlen_q must not be None when cu_seqlens_q is providedr  r4   r   )
r   rX   r   r`   r   r   r  r  r\   r   )r  r   r  rH  rD  rE  rF  r  r  rG  r  r  rH  rI  rJ  r  r  r  r  r(  r  logsumexp_batch_dimactual_max_seqlen_qactual_max_seqlen_kr)  r  r  r  s                               r8   !meta__efficient_attention_forwardrO    sE   , 	

1A

1AA

2I	BB
++aIrU\\
RC7C7O,++A.2VW M  +*6*B,4F		%*+b0A  	i7kk||J ;;rF;D[[5::f=F
D&*=?RRRr:   bias_requires_gradnum_splits_keyshared_storage_dqdkdvc                    U(       a  [         R                  " UR                  S   UR                  S   :H  S 5        [         R                  " UR                  S   UR                  S   :H  S 5        [         R                  " / UR                  SS QSPUR                  S   PUR                  S   P7UR                  UR
                  S9nUR                  S	S5      nUR                  S	S5      nUR                  S	S
5      nOB[         R                  " U5      n[         R                  " U5      n[         R                  " U5      nUby  UR                  S5      nUS-  S:X  a  UO
US-   US-  -
  n[        UR                  5       5      nUUS'   [         R                  " UUR                  UR
                  S9nUSS U24   nO[         R                  " SUR
                  S9nUUUU4$ )Nr3   c                      g)Nz,seqlen must match for `shared_storage_dqdkdvr4   r4   r:   r8   rg   4meta__efficient_attention_backward.<locals>.<lambda>m  s    Br:   r0   c                      g)Nz3embedding dim must match for `shared_storage_dqdkdvr4   r4   r:   r8   rg   rU  q      Ir:   r   r  r   r	  rv  r   r  .r4   r  )
rX   rj   r   r   r`   r   r$  r   r   r   )r  r  r   r  rH  rD  rE  rF  r  r  r  r   r  rG  rP  r  rQ  rR  chunkr@  rA  rB  r/  r0  r1  r.  s                             r8   "meta__efficient_attention_backwardrY  Q  s   2 KKNciil*B	
 	KKNciil*I	
 Eekk!BEEEKKOEU[[_E++<<

 \\"a(
<<A&\\"a(
%%e,
##C(%%e,
))B-$+bLA$57R<'TV,;V%	&	"KK	DKKP	c8G8m,	KK5<<8	xY66r:   scale_ascale_bscale_resultuse_fast_accumc                   ^ ^^^^^^^^ S n[         R                  " T R                  5       S:H  =(       a    TR                  5       S:H  UU 4S j5        [         R                  " U" T R                  5      =(       a    U" TR                  5      UU 4S j5        [	        T 5      S:X  d  [	        T 5      S:X  Ga  S n	S n
S	 n[         R                  " U	" T R                  5       5      =(       d    U" T 5      U 4S
 j5        [         R                  " U
" TR                  5       5      =(       d    U" T5      U4S j5        [         R                  " T R                  S5      S-  S:H  U 4S j5        [         R                  " TR                  S5      S-  S:H  =(       a    TR                  S5      S-  S:H  U4S j5        T R                  u  mmTR                  S5      mTR                  [         R                  :H  =(       a    TR                  [         R                  :H  =(       dA    TR                  [         R                  :H  =(       a    TR                  [         R                  :H  nTR                  5       S:X  am  TR                  5       S:X  aY  [         R                  " TR                  [         R                  :H  =(       a    TR                  [         R                  :H  S 5        GOU(       a  TR                  [         R                  :X  a  SnTS-  mOSnSn[        TU5      n[        US5      S-  nU[        TU5      -  U-  mU[        TU5      -  U-  mTR                  5       T:X  ab  TR                  5       T:X  aN  [         R                  " TR                  5       S 5        [         R                  " TR                  5       S 5        GO[         R                  " SUUUU4S j5        GO[         R                  " TR                  [         R                  :H  =(       a    TR                  [         R                  :H  S 5        [         R                  " TR                  5       S:H  =(       a    TR                  5       S:H  UU4S j5        TR                  S5      T:X  a~  TR                  S5      S:X  ai  TR                  S5      S:X  aT  TR                  S5      T:X  a?  [         R                  " TR                  5       =(       a    TR                  5       S 5        GO\TR                  S5      T:X  aX  TR                  S5      TR                  S5      s=:X  a  [        TS5      :X  a#  O  O TR                  S5      [        TS5      :X  a  OTR                  S5      T:X  aN  TR                  S5      TR                  S5      s=:X  a  [        TS5      :X  a  O  OTR                  S5      T:X  a  OTR                  S5      [        TS5      :X  aN  TR                  S5      TR                  S5      s=:X  a  [        TS5      :X  a  O  OTR                  S5      T:X  a  O[         R                  " SUUUUU4S j5        Ub  UOT R                  n[         R                  " T R                  S5      TR                  S5      UT R                  S9$ )Nc                     U [         R                  [         R                  [         R                  [         R                  [         R
                  4;   $ rD   rX   rp  float8_e5m2float8_e4m3fnuzfloat8_e5m2fnuzfloat4_e2m1fn_x2r   s    r8   is_fp8_or_fp4_type2_check_scaled_mm_sizes.<locals>.is_fp8_or_fp4_type  A    !!!!""
 
 	
r:   r   c                  L   > STR                  5        ST R                  5        3$ Nz%Inputs must be 2D but got self.dim()=z and mat2.dim()=r   rW  r   s   r8   rg   (_check_scaled_mm_sizes.<locals>.<lambda>  %    7
|CSTXT\T\T^S_`r:   c                  <   > STR                    ST R                    3$ Nz?Expected both inputs to be fp8 or fp4 types but got self.dtype=z and mat2.dtype=r   rj  s   r8   rg   rk    $    QRVR\R\Q]]mnrnxnxmyzr:   r   r   c                 6    U S   U S   :  =(       a    U S   S:H  $ r+  r4   r,  s    r8   is_row_major,_check_scaled_mm_sizes.<locals>.is_row_major  "    !9vay(;VAY!^;r:   c                 0    U S   S:H  =(       a    U S   S:  $ r+  r4   r,  s    r8   is_col_major,_check_scaled_mm_sizes.<locals>.is_col_major      !9>3fQi!m3r:   c                 `    U R                  S5      S:H  =(       d    U R                  S5      S:H  $ r+  r   	tensor_2ds    r8   has_zero_dim,_check_scaled_mm_sizes.<locals>.has_zero_dim  )    >>!$)CY^^A->!-CCr:   c                  *   > ST R                  5        3$ Nz#self must be row_major, got stride r,  r   s   r8   rg   rk        9$++-Ir:   c                  *   > ST R                  5        3$ Nz#mat2 must be col_major, got stride r,  rW  s   r8   rg   rk    r  r:   r3   r  r   c                  ,   > ST R                  S5       3$ NzBExpected self.size(1) to be divisible by 16, but got self.size(1)=r3   r   r   s   r8   rg   rk        XY]YbYbcdYeXfgr:   c                  "   > ST R                    3$ Nz?Expected both dimensions of mat2 to be divisible by 16 but got rR  r  s   r8   rg   rk        UVZV`V`Uabr:   c                      g)NzNFor tensorwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   rg   rk    s    hr:   r  r  r0  c                      g)Nzscale_a must be contiguousr4   r4   r:   r8   rg   rk        8r:   c                      g)Nzscale_b must be contiguousr4   r4   r:   r8   rg   rk    r  r:   Fc            	      Z   > ST  STR                  5        ST STR                  5        S3	$ )NzTInvalid blockwise scaling configuration. For blockwise scaling, scale_a should have  elements, got z, scale_b should have r  r)  )expected_a_sizeexpected_b_sizerZ  r[  s   r8   rg   rk    sC    FFUEVVefmfsfsfuev w//>.?w}}N__`br:   c                      g)NzKFor rowwise scaling, both scale_a and scale_b must be float (fp32) tensors.r4   r4   r:   r8   rg   rk    s    er:   c                  P   > ST R                  5       < STR                  5       < 3$ )NzLFor non-tensorwise scaling, scale tensors must be 2D, but got scale_a.dim()=z and scale_b.dim()=r   rZ  r[  s   r8   rg   rk    s*    gY`YdYdYfXhh|nunynyn{m}~r:   c                      g)Nz@Both scale_a and scale_b must be contiguous for rowwise scaling.r4   r4   r:   r8   rg   rk    s    ^r:   c                    > ST ST ST S[        T S5       S3	S[        T S5       S[        TS5       ST S[        T S5       S3	-   S[        T S5       ST S	[        TS5       S[        T S5       S3	-   S[        T S5       ST S
TR                  S5       STR                  S5       STR                  S5       STR                  S5       S3-   $ )N}Invalid scaling configuration. For tensorwise scaling, both scales should be scalar. For rowwise scaling, scale_a should be (, 1), scale_b should be (1, >). For (BlockWise1x128, BlockWise128x128), scale_a should be (r}   r  ), scale_b should be (<). For (BlockWise1x128, BlockWise1x128), scale_a should be (z>). For (BlockWise128x128, BlockWise1x128), scale_a should be (). Got scale_a.size()=(r   r3   ) and scale_b.size()=(r~   r9   r   )_krs  r  rZ  r[  s   r8   rg   rk  7  sG   CCD#Eabcad eVVWUXXZ[cdfhk[lZmmpr 0S0A/B"XaQTEUDV WTTUSVVXYabdfiYjXkknp	p 0S0A/B"QC HVV^_`beVfUggijrsuwzj{i||AA 0S0A/B"QC H//6||A.?r',,q/AR S//6||A.?r',,q/ARRSU
Ur:   r	  )rX   rj   r   r`   r  r   r   r   float8_e8m0fnurp  r   rm  r9   rq  r   r   )r   rW  rZ  r[  rH  r\  rJ  r]  re  rq  ru  r{  is_blockwise_scalingblock_size_kblock_size_mnnum_k_blockspadded_num_k_blocks
_out_dtyper  r  r  rs  r  s   ````              @@@@@r8   _check_scaled_mm_sizesr    s   
 
LL
a+DHHJ!O` 
LL4::&I+=djj+Iz
 4F"k$&75&@	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=b	
 

2IIaL !5!55 :MMU%9%99
 !4!44 9MMU%8%88 	 ==?aGMMOq$8LL.Q7==EMM3Qh " }} 3 33  "!V!M#B5L"*<";a"? M ::=PP  M ::=PP 
 ?2MMO6))+8 ))+8
  LL.Q7==EMM3Qe
 LL"9w{{}'9~ Q1$LLOq(LLOq(LLOq( ))+G0E0E0G^
 Q1$LLOw||AK(2s:KKLLOx3'77 Q1$LLOw||AK(2s:KKLLOq( Q8As#33LLOw||AK(2s:KKLLOq(   " (3J;;tyy|TYYq\DKKXXr:   c           
          [        XX#XEXg5      $ rD   )r  )r   rW  rZ  r[  rH  r\  rJ  r]  s           r8   meta_scaled_mmr  J  s     "Gd) r:   scale_recipe_ascale_recipe_b	swizzle_a	swizzle_bc           
      B  ^ ^^^^^	^^^ ^!^"^#^$^%^& S nS n[         R                  " T R                  5       S:H  =(       a    TR                  5       S:H  UU 4S j5        [         R                  " U" T R                  5      =(       a    U" TR                  5      UU 4S j5        T R                  S   m T R                  S   mTR                  S   m!U" T R                  5      (       a  U" TR                  5      (       a  SnTU-  mU Vs/ s H  n[        U5      PM     nnU Vs/ s H  n[        U5      PM     nnT(       a  T Vs/ s H  n[        U5      PM     snmO[        R                  /mT	(       a  T	 Vs/ s H  n[        U5      PM     snm	O[        R                  /m	[        T 5      S:X  d  [        T 5      S	:X  Ga  S
 nS nS n[         R                  " U" T R                  5       5      =(       d    U" T 5      U 4S j5        [         R                  " U" TR                  5       5      =(       d    U" T5      U4S j5        [         R                  " T R                  S5      S-  S:H  U 4S j5        [         R                  " TR                  S5      S-  S:H  =(       a    TR                  S5      S-  S:H  U4S j5        S[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnS[        [
           S[        [
           4S jnU" X55      (       a  [         R                  " TS   R                  5       S:H  =(       ad    TS   R                  5       S:H  =(       aG    TS   R                  [         R                  :H  =(       a     TS   R                  [         R                  :H  S 5        G
OU" X55      (       a  [         R                  " TS   R                  S   T :H  =(       a    TS   R                  5       T :H  =(       ad    TS   R                  [         R                  :H  =(       a=    TS   R                  5       T!:H  =(       a     TS   R                  [         R                  :H  UUUU 4S j5        G
O/U" X55      (       Ga  TS   R                  [         R                  :H  =(       a     TS   R                  [         R                  :H  nTS   m%T%R                  S   T :H  =(       a    T%R                  S   TS-  :H  =(       ac    T%R                  S5      S:H  =(       aH    T%R                  S5      T :H  =(       d-    T%R                  S   S:H  =(       a    T%R                  S5      S:H  nTS   m&T&R                  S   T!:H  =(       a    T&R                  S   TS-  :H  =(       ac    T&R                  S5      S:H  =(       aH    T&R                  S5      T!:H  =(       d-    T&R                  S   S:H  =(       a    T&R                  S5      S:H  n[         R                  " U=(       a    U=(       a    UUU U!U%U&4S j5        GOmU" X55      (       Ga  TS   R                  [         R                  :H  =(       a     TS   R                  [         R                  :H  n[        TS-  S 5      mTS   m%T%R                  S   T:H  =(       a    T%R                  S   T S-  :H  =(       ac    T%R                  S5      S:H  =(       aH    T%R                  S5      T:H  =(       d-    T%R                  S   S:H  =(       a    T%R                  S5      S:H  nTS   m&T&R                  S   T!:H  =(       a    T&R                  S   TS-  :H  =(       ac    T&R                  S5      S:H  =(       aH    T&R                  S5      T!:H  =(       d-    T&R                  S   S:H  =(       a    T&R                  S5      S:H  n[         R                  " U=(       a    U=(       a    UUUU U!U%U&4S! j5        GOU" X55      (       Ga  TS   R                  [         R                  :H  =(       a     TS   R                  [         R                  :H  n[        TS-  S 5      mTS   m%T%R                  S   T :H  =(       a    T%R                  S   TS-  :H  =(       ac    T%R                  S5      S:H  =(       aH    T%R                  S5      T :H  =(       d-    T%R                  S   S:H  =(       a    T%R                  S5      S:H  nTS   m&T&R                  S   T:H  =(       a    T&R                  S   T!S-  :H  =(       ac    T&R                  S5      S:H  =(       aH    T&R                  S5      T:H  =(       d-    T&R                  S   S:H  =(       a    T&R                  S5      S:H  n[         R                  " U=(       a    U=(       a    UUUU U!U%U&4S" j5        GOU" X55      (       Ga  [         R                  R                   (       ac  [#        T R                  S   S#5      T R                  S   -  m"[#        T R                  S   S#5      T R                  S   -  m#[        R                  m$O[        T R                  S   S5      [        [#        T R                  S   S#5      S 5      -  m"[        TR                  S   S5      [        [#        T R                  S   S#5      S 5      -  m#[        R$                  m$[         R                  " TS   R                  5       T":H  =(       a    TS   R                  [         R&                  :H  =(       a[    TS   R                  5       T#:H  =(       a>    TS   R                  [         R&                  :H  =(       a    TS   T$:H  =(       a    T	S   T$:H  U"U#U$UUUU	4S$ j5        GOU" X55      (       Ga  [        T S5      [        [#        TS5      S 5      -  m"[        T!S5      [        [#        TS5      S 5      -  m#[        R$                  m$[         R                  " TS   R                  5       T":H  =(       a    TS   R                  [         R(                  :H  =(       a[    TS   R                  5       T#:H  =(       a>    TS   R                  [         R(                  :H  =(       a    TS   T$:H  =(       a    T	S   T$:H  U"U#U$UUUU	4S% j5        GOU" X55      (       Ga  [        T S5      [        [#        TS5      S 5      -  m"[        T!S5      [        [#        TS5      S 5      -  m#[        R$                  m$[         R                  " TS   R                  5       T":H  =(       Ga
    TS   R                  [         R(                  :H  =(       a    TS   R                  5       S:H  =(       a    TS   R                  [         R                  :H  =(       a    TS   R                  5       T#:H  =(       a    TS   R                  [         R(                  :H  =(       a[    TS   R                  5       S:H  =(       a>    TS   R                  [         R                  :H  =(       a    TS   T$:H  =(       a    T	S   T$:H  U"U#U$UUUU	4S& j5        O[         R                  " S'UU U!UU4S( j5        Ub  UOT R                  n[         R*                  " T T!UT R,                  S)9$ s  snf s  snf s  snf s  snf )*Nc                     U [         R                  [         R                  [         R                  [         R                  [         R
                  4;   $ rD   r`  r   s    r8   re  5_check_scaled_mm_sizes_v2.<locals>.is_fp8_or_fp4_typeg  rg  r:   c                 (    U [         R                  :H  $ rD   )rX   rd  r   s    r8   is_fp4_type._check_scaled_mm_sizes_v2.<locals>.is_fp4_typep  s    ....r:   r   c                  L   > STR                  5        ST R                  5        3$ ri  r   rj  s   r8   rg   +_check_scaled_mm_sizes_v2.<locals>.<lambda>u  rl  r:   c                  <   > STR                    ST R                    3$ rn  r   rj  s   r8   rg   r  y  ro  r:   r   r3   r   r   c                 6    U S   U S   :  =(       a    U S   S:H  $ r+  r4   r,  s    r8   rq  /_check_scaled_mm_sizes_v2.<locals>.is_row_major  rs  r:   c                 0    U S   S:H  =(       a    U S   S:  $ r+  r4   r,  s    r8   ru  /_check_scaled_mm_sizes_v2.<locals>.is_col_major  rw  r:   c                 `    U R                  S5      S:H  =(       d    U R                  S5      S:H  $ r+  r   ry  s    r8   r{  /_check_scaled_mm_sizes_v2.<locals>.has_zero_dim  r}  r:   c                  *   > ST R                  5        3$ r  r,  r   s   r8   rg   r    r  r:   c                  *   > ST R                  5        3$ r  r,  r  s   r8   rg   r    r  r:   r  c                  ,   > ST R                  S5       3$ r  r   r   s   r8   rg   r    r  r:   c                  "   > ST R                    3$ r  rR  r  s   r8   rg   r    r  r:   recipe_arecipe_bc                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   
TensorWiser  r  s     r8   is_tensorwise0_check_scaled_mm_sizes_v2.<locals>.is_tensorwise  sT    H" :MQ&:QK;#9#99: QK;#9#99	r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   RowWiser  s     r8   
is_rowwise-_check_scaled_mm_sizes_v2.<locals>.is_rowwise  sT    H" 7MQ&7QK;#6#667 QK;#6#66	r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   BlockWise1x32r  s     r8   is_mx(_check_scaled_mm_sizes_v2.<locals>.is_mx  sT    H" =MQ&=QK;#<#<<= QK;#<#<<	r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   BlockWise1x16r  s     r8   is_nv_single_level5_check_scaled_mm_sizes_v2.<locals>.is_nv_single_level  sV     H" =MQ&=QK;#<#<<= QK;#<#<<	r:   c                 0   [        U 5      S:H  =(       a    [        U5      S:H  =(       am    U S   [        R                  :H  =(       aP    U S   [        R                  :H  =(       a3    US   [        R                  :H  =(       a    US   [        R                  :H  $ )Nr   r   r3   )r   r(   r  r  r  s     r8   is_nv(_check_scaled_mm_sizes_v2.<locals>.is_nv  s    H" :MQ&:QK;#<#<<: QK;#9#99: QK;#<#<<	:
 QK;#9#99r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   BlockWise1x128r  s     r8   is_1x128_1x1281_check_scaled_mm_sizes_v2.<locals>.is_1x128_1x128  sT    H" >MQ&>QK;#=#==> QK;#=#==	r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   r  BlockWise128x128r  s     r8   is_1x128_128x1283_check_scaled_mm_sizes_v2.<locals>.is_1x128_128x128  sW    H" @MQ&@QK;#=#==@ QK;#?#??	r:   c                     [        U 5      S:H  =(       aH    [        U5      S:H  =(       a3    U S   [        R                  :H  =(       a    US   [        R                  :H  $ r  )r   r(   r  r  r  s     r8   is_128x128_1x1283_check_scaled_mm_sizes_v2.<locals>.is_128x128_1x128  sT    H" >MQ&>QK;#?#??> QK;#=#==	r:   c                      g)Nz\For Tensorwise scaling, both scale_a and scale_b must be single element float (fp32) tensorsr4   r4   r:   r8   rg   r    s    vr:   c            	         > STR                   S    STS   R                  5        ST R                   S    STS   R                  5        S3	$ )Nz'For Rowwise scaling, scale_a must have r   z elements (got: z), and scale_b must have r3   r~   )r   r   )rW  rZ  r[  r   s   r8   rg   r  
  s\    =djjm_L\]def]g]m]m]o\p q//3zz!}o=MgVWjN^N^N`Maabdr:   r  c                     > ST ST S-   STR                    ST STR                   ST ST S-   STR                    ST STR                   S3$ )Nz>For 1x128 x 1x128 blockwise scaling, scale a must have shape [r}   r  ] (got: ) and stride [1, )scale b must have shape [r~   r   r   )r  r  r  sasbs   r8   rg   r  '  sy    001s"Q#XJhrxxjPabcaddlmomvmvlw x001s"Q#XJhrxxjPabcaddlmomvmvlwwxzr:   r0  c                     > ST STS-   STR                    ST STR                   ST ST S-   STR                    ST STR                   S3$ )Nz]For 128x128 x 1x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [r}   r  r  r  r  r~   r  r  L4r  r  r  r  s   r8   rg   r  F  sy    002t2a3hZxzQbcebffnoqoxoxny z001s"Q#XJhrxxjPabcaddlmomvmvlwwxzr:   c                     > ST ST S-   STR                    ST STR                   ST STS-   STR                    ST STR                   S3$ )Nz]For 1x128 x 128x128 blockwise scaling, L4 = {round_up(K / 128, 4)}, scale a must have shape [r}   r  r  r  r  r~   r  r  s   r8   rg   r  e  sy    001s"Q#XJhrxxjPabcaddlmomvmvlw x002t2a3hZxzQbcebffnoqoxoxnyyz|r:   r  c                     > ST  STS   R                  5        ST STS   R                  5        S[        R                   STS   R                   STS   R                   ST S	TS    STS    S
3$ )Nz!for MX scaling scale_a must have  (got: r   ) and scale_b must have z). Scales must have types z (for self: 	, mat_b: z) Must have swizzle type  (got self: r~   )r   rX   r  r`   expected_scale_a_elemsexpected_scale_b_elemsexpected_swizzlerZ  r[  r  r  s   r8   rg   r    s    78N7OwW^_`WaWgWgWiVj k--C,DGGTUJL\L\L^K_ `""'"6"6!7|GAJDTDTCUU^_fgh_i_o_o^p q..>-?|IVWL>Ybclmncobppqsr:   c                     > ST  STS   R                  5        ST STS   R                  5        ST STS    STS    S3$ )	Nz.for single-level NV scaling scale_a must have r  r   r  ). Must have swizzle type r  r  r~   r)  r  s   r8   rg   r    sr    DE[D\\cdklmdndtdtdvcw x--C,DGGTUJL\L\L^K_ `$$4#5\)A,yYbcdYeXffgir:   c                     > ST  STS   R                  5        ST STS   R                  5        ST STS    STS    S3$ )	Nz!for NV scaling scale_a must have r  r   r  r  r  r  r~   r)  r  s   r8   rg   r    sq    78N7OwW^_`WaWgWgWiVj k--C,DGGTUJL\L\L^K_ `$$4#5\)A,yYbcdYeXffgir:   Fc                  b  > ST ST ST S[        T S5       S3	S[        T S5       S[        TS5       ST S[        T S5       S3	-   S[        T S5       ST S	TS
   R                  S
5       STS
   R                  S5       STS
   R                  S
5       STS
   R                  S5       S3-   $ )Nr  r  r  r}   r  r  r  r  r  r   r3   r  r~   r  )r  r  r  rZ  r[  s   r8   rg   r    s   ??@cA]^_]` aRRSQTTVW_`acfWgVhhkm ,HQ,<+=RC@P?Q RPPQsRTU]^_adUeTffik	k ,HQ,<+=Rs C++21:??1+=*>bQRAS@T U++21:??1+=*>bQRAS@TTUWWr:   r	  )rX   rj   r   r`   r   r(   r)   
NO_SWIZZLEr  r   r   r   r   rm  r?   r  r  r9   SWIZZLE_32_4_4r  rp  r   r   )'r   rW  rZ  r  r[  r  rH  rJ  r  r  r]  re  r  K_packed_multipliersirq  ru  r{  r  r  r  r  r  r  r  r  types_ok
scale_a_ok
scale_b_okr  r  r  r  r  r  r  r  r  r  s'   ``` `   ``                    @@@@@@@@@r8   _check_scaled_mm_sizes_v2r	  Z  sK   
/ 
LL
a+DHHJ!O` 
LL4::&I+=djj+Iz 	

1A

1A

1A 4::;tzz#:#:	  0>?"k"oN?0>?"k"oN?/89y[_y9	 ""
	 /89y[_y9	 ""
	 4F"k$&75&@	<	4	D 	'=<+=I	
 	'=<+=I	
 	IIaL2"g	
 	IIaL2"=tyy|b'8A'=b	

	D$5 	kAR 		k!2 	d;>O 		D- 	k9J 		;'	373D		D- 	k9J 		T+%6 	${BS 		tK'8 	DDU 		tK'8 	DDU 	 88LL
  "a' 6AJ$$&!+6AJ$$56 AJ$$5v 77LL
  #q( 6AJ$$&!+6AJ$$56 AJ$$&!+6 AJ$$5
 N;;
 
  EMM1Wgaj6F6F%--6W  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  LL6Z6J  n== 
  EMM1Wgaj6F6F%--6W  !c'1%BBr! UHHQK18+UIIaLA%U YYq\R'SBHHQK1,<,R1QRAR	  Bq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  LL6Z6J  n== 
  EMM1Wgaj6F6F%--6W  !c'1%BBq  THHQK18+TIIaLA%T YYq\Q&R288A;!+;+Q		!PQ@Q	  Br! UHHQK18+UIIaLA%U YYq\R'SBHHQK1,<,R1QRAR	  LL6Z6J  >22}}  )1$**Q-)DtzzRS})T&)1$**Q-)DtzzRS})T&#.#9#9 )1$**Q-)ETZZ]B/I *& *2$**Q-)ETZZ]B/I *& $/#=#= LL
  "&<< 5AJ$$(<(<<5AJ$$&*@@5 AJ$$(<(<<5 aL$44	5
 aL$44   ??%-a%5!RRS8T%T"%-a%5!RRS8T%T"*99LL
  "&<< 5AJ$$(;(;;5AJ$$&*@@5 AJ$$(;(;;5 aL$44	5
 aL$44  >22%-a%5!RRS8T%T"%-a%5!RRS8T%T"*99LL
  "&<< 	5 	5AJ$$(;(;;	5AJ$$&!+	5 AJ$$5	5 AJ$$&*@@		5
 AJ$$(;(;;	5 AJ$$&!+	5 AJ$$5	5 aL$44	5 aL$44 $ LL
 
 (3J;;q!:dkkBBm	 @? : :s   ~#~~:~r   contraction_dimsc                 (    [        U UUUUUUU	UUUS9$ )N)rH  rJ  r  r  r]  )r	  )r   rW  rZ  r  r  r[  r  r  rH  r   r	  r]  s               r8   meta_scaled_mm_v2r	    s2     %% r:   c           	      P    [        XX#USS9  U R                  U R                  5      $ NT)r  r  r   r   r   r7  rx  rv  s         r8   meta_scatter_reduce_twor	    s%     dVTJ>>$**%%r:   c           	          [        XX#USS9  U $ r		  r  r
	  s         r8   meta_scatter_reduce__twor	    s    dVTJKr:   c                |  ^  [         R                  " ST R                  5       s=:  =(       a    S:*  Os  U 4S j5        T R                  5       S:X  a.  [         R                  " U[         R                  T R
                  S9$ [         R                  " T R                  S5      U[         R                  T R
                  S9$ )Nr   r   c                  *   > ST R                  5        3$ )NzAThe probability distributions dimensions must be 1 or 2, but got r   r  s   r8   rg   "meta_multinomial.<locals>.<lambda>  s    STYT]T]T_S`ar:   r3   r	  )rX   rj   r   r   r   r   r   )r   num_samplesreplacementr  s   `   r8   meta_multinomialr	    s     
LL	EIIK1a yy{a{{;ejjNN;;

1{%**U\\ r:   c                 $    SnU  H  nX-  nM	     U$ r2   r4   )vsr~  vs      r8   multiply_integersr	    s    	A	 Hr:   c                 ^  ^ ^^^ [         R                  " [        T5      T:H  UU4S j5        TS-   m[         R                  " [        T 5      T:H  UU 4S j5        [         R                  " [        S T SS   5       5      =(       a    [        S T 5       5      U U4S j5        T S S u  p4X4/TQ7$ )Nc                  &   > ST  S[        T5       3$ )Nz%It is expected output_size equals to , but got size r  )num_spatial_dimsr  s   r8   rg   'upsample_common_check.<locals>.<lambda>  s    78H7IY\]hYiXjkr:   r   c                  &   > ST  S[        T5       3$ )Nz$It is expected input_size equals to r	  r  )expected_input_dimsr  s   r8   rg   r	  	  s    67J6K?[^_i[jZklr:   c              3   *   #    U  H	  oS :  v   M     g7fr   Nr4   rv   r  s     r8   rx   (upsample_common_check.<locals>.<genexpr>  s     *>aE>   c              3   *   #    U  H	  oS :  v   M     g7fr 	  r4   r!	  s     r8   rx   r"	    s     2N+Qq5+r#	  c                     > ST  ST 3$ )NzDInput and output sizes should be greater than 0, but got input size z and output size r4   )r  r  s   r8   rg   r	    s      \!2;-Ar:   )rX   rj   r   r  )r  r  r	  r	  channelsr	  s   ```  @r8   upsample_common_checkr'	    s    	LLK,,k +Q.	LLJ..l
 
LL*:ab>**Ns2N+2N/N	A ""1~F+{++r:   c                 8  ^  [         R                  " T R                  5       S:g  =(       d    [        T R	                  5       SS  5      U 4S j5        [        T R	                  5       USS9nT R                  U5      R                  [        R                  " T 5      S9$ )Nr   r3   c                  *   > ST R                  5        3$ )Nz>Non-empty 3D data tensor expected but got a tensor with sizes r   r  s   r8   rg   $upsample_nearest1d.<locals>.<lambda>      PQVQ[Q[Q]P^_r:   r	  r   
rX   rj   r   r	  r   r'	  r   r4  rQ   r   )r   r  scalesfull_output_sizes   `   r8   upsample_nearest1dr0	         
LLA/

QR0@A_ -

kA ??+,//11%8 0  r:   c                   ^  [         R                  " T R                  5       S:g  =(       d    [        T R	                  5       SS  5      U 4S j5        [        T R	                  5       USS9nT R                  U5      n[        R                  " T 5      nT R                  u  px  nT R                  R                  S:X  a  US:  a  [         R                  nUR                  US9nU$ )	Nr   r3   c                  *   > ST R                  5        3$ Nz>Non-empty 4D data tensor expected but got a tensor with sizes r   r  s   r8   rg   $upsample_nearest2d.<locals>.<lambda>,  r+	  r:   r   r,	  r   r0  r   )rX   rj   r   r	  r   r'	  r   rQ   r   r   r   r   r   
contiguous)	r   r  scales_hscales_wr/	  r
  r   rT   
n_channelss	   `        r8   upsample_nearest2dr:	  &  s     
LLA/

QR0@A_ -

kA __-.F //6M  ++A1a||F"zA~//];FMr:   r  r  r7	  r8	  c                 X  ^ ^^ [        X!SS9m[        R                  " T R                  S:H  U 4S j5        [	        S5       H5  m[        R                  " T R                  T5      TT   :H  UU U4S j5        M7     T R                  U5      R                  [        R                  " T 5      S9$ )Nr   r,	  r0  c                  "   > ST R                    3$ NzFExpected grad_output to be a tensor of dimension 4 but got: dimension r  r  s   r8   rg   -upsample_nearest2d_backward.<locals>.<lambda>R      XYdYiYiXjkr:   c            
      D   > ST ST T    ST STR                  T5       3$ )NzCExpected grad_output to have the same shape as output; output.size() = z but got grad_output.size(r   r/	  r%  r   s   r8   rg   r>	  W  s9      !s$'7':&;,QCtK4D4DQ4G3HJr:   r   )
r'	  rX   rj   r   r   r   r   r4  rQ   r   )r%  r  r  r7	  r8	  r/	  r   s   `    @@r8   upsample_nearest2d_backwardrC	  @  s     -! 
LLAk 1XQ#3A#66	
    ,//11+> 0  r:   c                 8  ^  [         R                  " T R                  5       S:g  =(       d    [        T R	                  5       SS  5      U 4S j5        [        T R	                  5       USS9nT R                  U5      R                  [        R                  " T 5      S9$ )Nr   r3   c                  *   > ST R                  5        3$ )Nz>Non-empty 5D data tensor expected but got a tensor with sizes r   r  s   r8   rg   $upsample_nearest3d.<locals>.<lambda>i  r+	  r:   r0   r,	  r   r-	  )r   r  scales_dr7	  r8	  r/	  s   `     r8   upsample_nearest3drH	  c  r1	  r:   c                    [         R                  " U 5      [         R                  " U [         R                  S9pvUb  Ub  [        U[        5      (       d  [        S[        U5       35      e[        U[        5      (       d  [        S[        U5       35      eUR                  nUR                  5       n	[        XH5      n[        XX5      nUR                  X5        UR                  X5        [        XdS9  [        XuS9  XE4$ Xg4$ )Nr   zvalues must be TensorLike, got z indices must be TensorLike, got )r  r  )rX   r   r   rt   r   r   r   r   r   r!   r   r#   )
r   stabler   
descendingr   r   r	  r   r   
out_strides
             r8   	meta_sortrM	  s  s     D!5#3#3D#Lqg1&*-- #B4<.!QRR':.. #CDM?!STT GG	XXZ
"65#G791I2344Kr:   c           	      >  ^ ^^^^^^^ [         R                  " T R                  S:H  U 4S j5        [         R                  " T R                  TR                  :H  UU 4S j5        T R	                  S5      mTb  [         R                  " TR                  S:H  U4S j5        [         R                  " TR                  5       T:H  UU4S j5        [         R                  " TR                  TR                  :H  UU4S j5        [         R                  " TR                  S:H  U4S j5        T R	                  S	5      T-  T-  m[         R                  " TR                  5       T:H  UUUU U4S
 j5        [         R                  " [        U 4S jTTTT4 5       5      S 5        g )Nr   c                  "   > T R                    S3$ Nz != 2r  )input_gatess   r8   rg   %rnn_cell_checkSizes.<locals>.<lambda>      ;3C3C2DE0Jr:   c                  :   > TR                    ST R                    3$ Nrp  rR  )hidden_gatesrQ	  s   r8   rg   rR	    s    ;$$%T,*<*<)=>r:   r3   c                  "   > T R                    S3$ )Nz != 1r  )
input_biass   r8   rg   rR	    s    joo5Fe3Lr:   c                  .   > TR                  5        ST  3$ rU	  r)  )
gates_sizerX	  s   r8   rg   rR	    s    z'')*$zl;r:   c                  :   > TR                    ST R                    3$ rU	  rR  )hidden_biasrX	  s   r8   rg   rR	    s    z''([->->,?@r:   c                  "   > T R                    S3$ rP	  r  )prev_hiddens   r8   rg   rR	    rS	  r:   r   c            
      `   > TR                  5        STR                  S5       ST ST ST  S3
$ )Nrp  r   z * z // z (aka r~   )r   r   )expected_prev_hidden_numelfactorrZ	  rQ	  r^	  s   r8   rg   rR	    s@    ;$$&'tK,<,<Q,?+@J<tTZS[[ab|a}}~r:   c              3   V   >#    U  H  nUR                   TR                   :H  v   M      g 7frD   r  )rv   r=   rQ	  s     r8   rx   &rnn_cell_checkSizes.<locals>.<genexpr>  s'      
I HH***Is   &)c                      g)Nz%expected all inputs to be same devicer4   r4   r:   r8   rg   rR	    s    7r:   )rX   rj   r   r   r   r   r  )rQ	  rV	  rX	  r\	  ra	  r^	  r`	  rZ	  s   ``````@@r8   rnn_cell_checkSizesre	    sC    
LL!!Q&(JK	LL\///> !!!$JZ__)+LM*,;	
 	 1 11@	
 
LL!!Q&(JK!,!1!1!!4z!AV!K	LL99 
LL 
"J[I
 	
 	8r:   c                     [        XX4SU5        [        R                  " U [        R                  S9n[        R                  " U[        R                  S9n[        R                  " U[        R                  S9nXgU4$ )Nr0  r   )re	  rX   r   r   )rQ	  rV	  cxrX	  r\	  	workspacehycys           r8   _thnn_fused_lstm_cell_metark	    se     :ArR  E<S<STI			"E,C,C	DB			"E,C,C	DBIr:   c                    [        U5      S:g  nU(       a   [        U5      nUS   nU R                  S   nOLU
(       a  U R                  S   OU R                  S   nU
(       a  U R                  S   OU R                  S   nSnU(       a  SOSnUS:w  a  UOUnU(       a  UUU-  /nOU
(       a  UUUU-  /OUUUU-  /nU R                  U5      nU	U-  UU/nUc   [        R                  " SU R
                  S9nOUR                  U5      nUR                  U	U-  UU/5      nU(       a  SOSnU R                  U[        R                  S9nUUUUU4$ )Nr   r3   r   r   r  r   )r   r   r   rX   r   r   r  )r   rF  weight_stride0
weight_bufhxrg	  rz  hidden_size	proj_size
num_layersbatch_firstdropouttrainbidirectionalbatch_sizesdropout_stateis_input_packed
seq_length
mini_batchbatch_sizes_sumnum_directionsout_sizer   r
  
cell_shaperj	  ri	  reserve_shapereserves                                r8   
_cudnn_rnnr	    sS   & +&!+O%
 ^
++a.'2U[[^A
'2U[[^A
'QQN%NyH$h&?@	  X%>?j(^*CD 	
 __Y'F~-z;GJ	z[[5<<0\\*%	zN2JI	JB AAMoom5;;o?G2r7J..r:   c                     SnU H+  nUR                  5       S:  d  M  XR                  5       -  nM-     U R                  U45      n[        U UUUUUUUSUUU	U
UUU5      $ r  )r   r   r	  )r   rF  rm	  ro	  rg	  rz  rp	  rr	  rs	  rt	  ru	  rv	  rw	  rx	  total_weight_elemsr  rn	  s                    r8   
miopen_rnnr	    s    & 779q='')+  "4!67J

	! r:   c                 >   U(       a  U R                   S   OU R                   S   nU(       a  U R                   S   OU R                   S   nU
nU(       a  UUU/OUUU/nU R                  U5      nUc   [        R                  " SU R                  S9nOUR                  UR                   5      nUc   [        R                  " SU R                  S9nOUR                  UR                   5      n[        R                  " SU R                  [        R
                  S9nUUUU4$ )Nr3   r   r  r   )r   r   rX   r   r   r  )r   w0w1w2w3hx_cx_r   rw	  rz  rp	  rr	  
has_biasesrv	  rs	  ru	  rz	  r{	  output_chanelsr   r
  ri	  rj	  rh	  s                           r8   mkldnn_rnn_layerr	  +  s    & $/QEKKNJ#.QEKKNJ N  
Z0*n5 
 __Y'F
{[[5<<0]]399%
{[[5<<0]]399%Aell%++FI2r9$$r:   c                    ^^ U R                   S:X  a,  [        R                  " TS:H  =(       d    TS:H  UU4S j5        g [        R                  " U R                  T5      S:g  UU4S j5        g )Nr   r   c                     > T ST  3$ )Nz4: Expected reduction dim -1 or 0 for scalar but got r4   r   r  s   r8   rg   'zero_numel_check_dims.<locals>.<lambda>W  s    wiSTWSXYr:   c                     > T ST  S3$ )Nz: Expected reduction dim z to have non-zero size.r4   r	  s   r8   rg   r	  \  s    wi8=TUr:   )r   rX   r   r   )r   r   r  s    ``r8   zero_numel_check_dimsr	  S  sR    yyA~1H!r	Y	

 	IIcNaU	
r:   c                    ^  Ub&  [        X!R                  5       5      n[        XT 5        g [        R                  " UR                  5       S:g  U 4S j5        g )Nr   c                     > T  S3$ )Nz@: Expected reduction dim to be specified for input.numel() == 0.r4   rb  s   r8   rg   %check_argmax_argmin.<locals>.<lambda>h  s    tf\]r:   )r   r   r	  rX   rj   r   )r  r   r   s   `  r8   check_argmax_argminr	  a  s?    
S((*-d.JJLA]	
r:   c                     [        SX5        [        R                  " U R                  Ub  U4OS 5      n[	        XU5      nU R                  U[        R                  S9$ )Nargmaxr   )r	  rQ   r  r   r  r   rX   r   )r   r   r  r  r   s        r8   argmax_argmin_metar	  l  sM    $,

coSF4PD$T9E>>%u{{>33r:   c                 v    U[         R                  :X  a  [         R                  n[         R                  " SXX4S9$ )Nr4   r  )rX   jaggedr  r   )r  r`   r   r   r   s        r8   scalar_tensorr	  t  s1    
 ;;
%v r:   c                    [        X R                  5       SS9nU R                  5       S:X  a  SOU R                  U5      n[        R                  " US:  5        [        R                  " X:*  S 5        [        U R                  5      n[        U5      S:  a  XU'   U R                  U5      U R                  U[        R                  S94$ )NTr  r   r3   c                      g)Nzk not in range for dimensionr4   r4   r:   r8   rg   topk_meta.<locals>.<lambda>  s    )Gr:   r   )
r   r   r   rX   rj   r   r   r   r   r   )r   rb  r   largestr  	sliceSizetopKSizes          r8   	topk_metar	    s     hhjd
;CXXZ1_$))C.I	LLa	LL!GHDJJH
8}q>>(#T^^HEKK^%PPPr:   c                     Uc  Uc  [        S5      eUR                  5       nU R                  5       n	[        R                  " UU	R                  U	R
                  U	R                  S9$ )Nz;segment_reduce(): Either lengths or offsets must be defined)r`   r   r   )r   r6	  rX   r   r`   r   r   )
rz  r
  r  rx  r  r  r  r  data_contiggrad_contigs
             r8   meta__segment_reduce_backwardr	    sh    
 7?I
 	
 //#K//#K!!!!	 r:   c                   ^ SSK Jn  [        TU R                  5       SS9mU R                  5       S:  a  U R	                  T5      OSn[
        R                  " U" US:  X:*  5      U4S j5        [        U R                  S T U R                  TS-   S  -   5      nU(       a&  U R                  5       S:  a  UR                  TS5        U R                  U5      U R                  U[
        R                  S94$ )Nr   )r  Tr	  r3   c                     > ST  3$ )Nz9kthvalue(): selected number k out of range for dimension r4   r   s   r8   rg   kthvalue_meta.<locals>.<lambda>  s    KC5Qr:   r   )r1  r  r   r   r   rX   rj   r   r   r<  r   r   )r   rb  r   r  r  dimSizer   s     `    r8   kthvalue_metar	    s     >
dhhjd
;C $
QdiinAG	LLQ%Q
 DS!DJJsQwy$99:E488:>S!>>% $..ekk."JJJr:   c                    U b  U OUn[         R                  " UR                  5       S:H  S 5        UR                  5       nU b)  [         R                  " U R                  5       U:H  S 5        Ub)  [         R                  " UR                  5       U:H  S 5        [         R                  " UR                  5       U:H  S 5        [         R                  " UR                  5       U:H  S 5        [         R                  " UR                  5       S:H  S 5        [         R                  " UR	                  5       US   US	   -  S
-  :H  S 5        g )Nr   c                      gN r4   r4   r:   r8   rg   (checkLSTMBackwardSizes.<locals>.<lambda>  s    "r:   c                      gr	  r4   r4   r:   r8   rg   r	        r:   c                      gr	  r4   r4   r:   r8   rg   r	    r	  r:   c                      gr	  r4   r4   r:   r8   rg   r	        r:   c                      gr	  r4   r4   r:   r8   rg   r	    r	  r:   c                      gr	  r4   r4   r:   r8   rg   r	    s    rr:   r   r3   r0  c                      gr	  r4   r4   r:   r8   rg   r	    s    Rr:   )rX   rj   r   r   r   )grad_hygrad_cyrg	  rj	  rh	  defined_gradexp_sizes          r8   checkLSTMBackwardSizesr	    s    %17wL	LL!!#q(*5  "HW\\^x/<W\\^x/<	LLh&
3	LLh&
3	LLA%z2	LL"hqkHQK&?!&CCZPr:   c                     U c  Uc  g[        XX#U5        [        R                  " U[        S9n[        R                  " U[        S9nU(       a  UR	                  SSS9OS nXgU4$ )NNNNr   r   F)r  )r	  rX   r   legacy_contiguous_memory_formatr$  )	r	  r	  rg	  rj	  rh	  has_bias
grad_gatesgrad_cxr.  s	            r8   #_thnn_fused_lstm_cell_backward_implr	    sf    7?7RY?!!!@J r1PQG4<
q%0$I	))r:   c                 2   S nS nS nUS   (       a  UR                  U R                  5       5      nUS   (       d
  US   (       aQ  UR                  UR                  S5      U R                  S5      45      nUR                  UR                  S5      5      nXEU4$ )Nr   r3   r   r   rN  )r;  r:  r<  r?  r(  grad_weightr.  s          r8   linear_backwardr	    s    JKI1~!++FKKM:
1~Q",,l.?.?.CV[[QS_-UV **<+<+<R+@A	Y//r:   c                   ^ ^ [        T R                  5      S:  a  T R                  S   X-  -  S:X  d  [        ST R                   SU 35      eS mUU 4S jnT R                  S   X-  -  nT R                  S   U-  nT R                  S	   U-  n/ T R                  S S QUPUPUP7nT R                  U5      nUR	                  U" 5       S
9nU$ )Nr   rv  r   z'Invalid input shape for pixel_shuffle: z with upscale_factor = c                 b    [         R                  R                  U 5      [         R                  :H  $ rD   r  r  s    r8   r  ,meta_pixel_shuffle.<locals>.is_channels_last  s$    ""88=ATATTTr:   c                  F  > T " T5      (       a/  [        T5      S:X  a  [        R                  $ [        R                  $ TR	                  [        R                  S9(       a  [        R                  $ TR	                  [        R
                  S9(       a  [        R
                  $ g )Nr   r   )r  rX   r   r  rq  preserve_format)r  r   s   r8   r  .meta_pixel_shuffle.<locals>.pick_memory_format  s    D!!4 F*...***e.E.EF***e.C.CD((( Er:   r  r   r   )r   r   r   r   r4  )	r   upscale_factorr  r[  HrWrr   r   r  s	   `       @r8   meta_pixel_shuffler	    s     	DJJ!

2.2Q RVW W5djj\AXYgXhi
 	
U	) 	

2>:;A	B.	(B	B.	(B-$**Sb/-1-b-"-I
..
#C
&&13&
4CJr:   c                 X   U R                  U R                  5      nUR                  UR                  5      nUR                  UR                  5      nUR                  UR                  5      nUR                  UR                  5      nUR                  UR                  5      nUUUUUUU4$ rD   r  )r   weight0weight1weight2weight3r	  cx_tmpr
  hy_cy_grad_output_r_optgrad_hy_r_optgrad_cy_r_optr   rz  rp	  rr	  r	  ru	  rv	  rw	  rs	  rh	  diff_xdiff_hxdiff_cxdiff_w1diff_w2diff_bs                                r8   mkldnn_rnn_layer_backwardr	    s    4 __U[[)FmmCII&Gv||,G.G.Gw}}-F7GVVWgEEr:   )	out_int32r   c                    [         R                  " U U(       a  [         R                  O[         R                  [         R                  S9$ )Nr`   r   )rX   r   rP  r   r   r   
boundariesr	  r   s       r8   meta_bucketizer	  %  s2     &ekkEKK-- r:   r	  r	  r   c                p    UR                  SU(       a  [        R                  S9$ [        R                  S9$ )Nr4   r   )r   rX   rP  r   r	  s       r8   meta_bucketize_scalarr	  /  s>     
&ekk    ,1KK    r:   c                   ^ ^^^^ Sm[        T 5      S:X  a)  [        R                  " T R                  5       U 4S j5        [        T 5      S:X  a+  T R                  5       (       a  [        R
                  " S5        [        R                  " [        T[        5      UU4S j5        [        R                  " TS:  UU4S j5        [        R                  " [        T[        5      UU4S	 j5        [        R                  " [        T[        5      UU4S
 j5        [        R                  " TT:  U4S j5        [        R                  " TT R                  T R                  S9$ )Nzhistc()r  c                  $   > ST R                    S3$ )Nz%"histogram_cpu" not implemented for 'r  r   r  s   r8   rg   meta_histc.<locals>.<lambda>D  s    =ekk]!Lr:   r   z%_histc_cuda with floating point inputc                  $   > T S[        T 5       3$ )Nz#: argument 'bins' must be int, not rV  binsr  s   r8   rg   r	  J  s    7)>tDzlKr:   r   c                     > T ST  3$ )Nz: bins must be > 0, but got r4   r	  s   r8   rg   r	  L  s    gY.J4&#Qr:   c                  $   > T  S[        T5       3$ )Nz%: argument 'min' must be Number, not rV  )r  r  s   r8   rg   r	  O      7)@cLr:   c                  $   > T  S[        T5       3$ )Nz%: argument 'max' must be Number, not rV  )r  r  s   r8   rg   r	  S  r	  r:   c                     > T  S3$ )Nz: max must be larger than minr4   )r  s   r8   rg   r	  U  s    y0M%Nr:   r   )r  rX   rj   r  rQ   rN  rt   r   r   r   r   r`   )r   r	  r  r  r  s   ````@r8   
meta_histcr	  =  s     G5U"##%L	
 5V#(?(?(A(A%%&MN	LL4!K 
LLQR	LL3L 
LL3L 
LLNO;;tELLDDr:   c                 F  ^  [        T R                  5       USS9n[        R                  " T R	                  5       S:g  =(       d#    [        S T R                  5       SS   5       5      U 4S j5        T R                  U5      R                  [        R                  " T 5      S9$ )Nr   r,	  r   c              3   *   #    U  H	  oS :  v   M     g7fr 	  r4   )rv   r   s     r8   rx   ,meta_upsample_bimode2d_aa.<locals>.<genexpr>g  s     !H7Gt(7Gr#	  r3   c                  *   > ST R                  5        3$ r4	  r   r  s   r8   rg   +meta_upsample_bimode2d_aa.<locals>.<lambda>h  r+	  r:   r   )
r'	  r   rX   rj   r   r  r   r4  rQ   r   )r   r  rV  r7	  r8	  r/	  s   `     r8   meta_upsample_bimode2d_aar
  Y  s     -

kA 
LLHc!Huzz|AB7G!HH_ ??+,//11%8 0  r:   c                 T  ^ ^^ [        X!SS9m[        R                  " T R                  S:H  U 4S j5        [	        S5       H3  m[        R                  " T R
                  T   TT   :H  UU U4S j5        M5     T R                  U5      R                  [        R                  " T 5      S9$ )Nr   r,	  r0  c                  "   > ST R                    3$ r=	  r  r  s   r8   rg   4meta_upsample_bimode2d_aa_backward.<locals>.<lambda>}  r?	  r:   c            
      D   > ST ST T    ST STR                  T5       3$ )NzD
Expected grad_output to have the same shape as output; output.size(rA	  z
but got grad_output_size(r   rB	  s   r8   rg   r
    s>     DDE3dK[\]K^J_ `D!1!1!!4 59r:   r   )
r'	  rX   rj   r   r   r   r   r4  rQ   r   )r%  r  r  rV  r7	  r8	  r/	  r   s   `     @@r8   "meta_upsample_bimode2d_aa_backwardr
  o  s     -! 
LLAk 1Xa $4Q$779	
    ,//11+> 0  r:   c                 X   [         R                  " UR                  5       S:H  S 5        [         R                  " UR                  5       S:H  S 5        [         R                  " UR                  R                  S 5        [         R                  " UR                  R                  S 5        g )Nr3   c                      g)Nz%found_inf must be a 1-element tensor.r4   r4   r:   r8   rg   <_amp_foreach_non_finite_check_and_unscale_.<locals>.<lambda>      (Or:   c                      g)Nz%inv_scale must be a 1-element tensor.r4   r4   r:   r8   rg   r

    r
  r:   c                      g)Nz!found_inf must be a float tensor.r4   r4   r:   r8   rg   r

        3r:   c                      g)Nz!inv_scale must be a float tensor.r4   r4   r:   r8   rg   r

    r
  r:   )rX   rj   r   r`   r  )r   rR  	inv_scales      r8   *_amp_foreach_non_finite_check_and_unscale_r
    s|    	LLQ O 
LLQ O 
LL))3 
LL))3r:   c                 .    [         R                  " U 5      $ rD   rG  )r   nanposinfneginfs       r8   
nan_to_numr
    r  r:   c                    U R                   [        R                  [        R                  [        R                  [        R
                  1;   a  [        SU R                    S35      eU R                  n[        X5      n[        X#5      nX:X  a  U $ [        U R                  5       5      n[        U R                  5       5      nXR   XQ   sXQ'   XR'   XB   XA   sXA'   XB'   U R                  XE5        U $ )Nz>torch.transpose_: in-place transposition is not supported for z layout)r   rX   r  
sparse_cscr  
sparse_bscr   r   r   r   r   r   r   )r   dim0r  ndimsr   r   s         r8   r  r    s    {{	  LT[[MY`a
 	
 IIE$&D$&D|		D$++- F!'v|FL&,!ZDJ
T"Kr:   c                 2   U R                   nU R                  (       a?  U R                  5       nU R                  5       nUS::  a  US:X  d  [	        SU SU S35      eO#U R                  5       S:  a  [	        SU S35      e[        U SUS:  a  S5      $ S5      $ )	Nr   r   zEt_ expects a tensor with <= 2 sparse and 0 dense dimensions, but got z sparse and z dense dimensionsz6t_ expects a tensor with <= 2 dimensions, but self is rZ  r3   )r   ri  rk  rl  r   r   r  )r   r
  rk  rl  s       r8   t_r
    s    IIE~~__&
NN$	aIN %,l9+=NP  %3 88:> HqQ  dAEAIq55155r:   )r	  r   sidesorterc                  ^ ^^ [         R                  " [        T R                  5      S:*  =(       d    T R                  S S TR                  S S :H  UU 4S j5        [         R                  " TS L =(       d    T R                  TR                  :H  U U4S j5        [         R                  " US:g  =(       d    U(       + S 5        U(       a  [         R                  O[         R
                  n[        T[         R                  5      (       a$  [         R                  " TU[         R                  S9$ [         R                  " SUT R                  S	9$ )
Nr3   r   c                  `   > S[        TR                  5       S[        T R                  5       3$ )Nztorch.searchsorted(): boundaries tensor should be 1 dimension or the first N-1 dimensions of boundaries tensor and input value tensor must match, but we got boundaries tensor z and input value tensor r   r   )r   sorted_sequences   r8   rg   #meta_searchsorted.<locals>.<lambda>  s3    3378M8M3N2O P""&tzz"2!35r:   c                  n   > S[        T R                  5       STb  [        TR                  5       3$ /  3$ )Nz[torch.searchsorted(): boundary and sorter must have the same size, but got boundary tensor z and got sorter tensor r"
  )r#
  r
  s   r8   rg   r$
    sB    ##'(=(=#>"??V%+%7tFLL!@B=?@Br:   r   c                      g)Nzetorch.searchsorted(): side and right can't be set to opposites, got side of left while right was Truer4   r4   r:   r8   rg   r$
    s     $r:   r	  r4   r	  )rX   rj   r   r   rP  r   rt   r   r   r   r   r   )r#
  r   r	  r   r
  r
  r`   s   ``   ` r8   meta_searchsortedr'
    s     
LLO!!"a' 	9  "%CR8	
	 
LL$?///6<<?	
 
LL#e)	$ %EKK%++E$%%U-D-D
 	
 {{2U?3I3IJJr:   c                    ^  [         R                  " T [         R                  [         R                  [         R                  4;  U 4S j5        g )Nc                     > ST  3$ )Nz/Unsupported input type encountered for isin(): r4   r   s   r8   rg   3_check_for_unsupported_isin_dtype.<locals>.<lambda>  s    A%Ir:   )rX   rj   r  
complex128	complex64r   s   `r8   !_check_for_unsupported_isin_dtyper-
  
  s/    	LLejj%"2"2EOODDIr:   c                 H    U R                  X R                  S5      45      nU$ )Nr   rN  )r%  r   num_weightsr-  r:  r	  s         r8   meta_embedding_dense_backwardr0
    s(     ''6F6Fr6J(KLKr:   c                 t    U	(       a  [         R                  U UUUUUUUU
U5
      $ [        U UUUUUUUU
U5
      $ rD   )r-   _embedding_bag_sparse_backward!meta_embedding_bag_dense_backward)rz  r   r  r>  r?  maximum_indicesr/
  r:  rz  r;  r'  r-  s               r8   meta_embedding_bag_backwardr5
    se     22
 	
 1
 	
r:   c
                 X  ^  [         R                  " T R                  [         R                  [         R                  [         R
                  [         R                  4;   U 4S j5        U[        :X  a  [         R                  " US L5        T R                  UT R                  S5      45      n
U
$ )Nc                  "   > ST R                    3$ )Nz$Unsupported input type encountered: r   )rz  s   r8   rg   3meta_embedding_bag_dense_backward.<locals>.<lambda>W  s    6tzzlCr:   r3   )
rX   rj   r`   rn  ro  rm  float64r8  r   r   )rz  r   r>  r?  r4
  r/
  r:  rz  r'  r-  index_grad_weights   `          r8   r3
  r3
  H  sv     
LL

u}}ennemmU]]SSC x_D01TYYq\'BCr:   c                    U R                  S5      n[        R                  " U[        :H  S 5        [        R                  " U R	                  5       S:H  5        [        R                  " UR	                  5       S:H  5        UR                  S5      n[        R                  " UR	                  5       S:H  5        [        R                  " UR                  S5      U:H  5        U R                  U45      n	U	$ )Nr3   c                      g)NzHembedding_bag_backward: per_sample_weights only supported for mode='sum'r4   r4   r:   r8   rg   @meta_embedding_bag_per_sample_weights_backward.<locals>.<lambda>l  r3  r:   r   r   )r   rX   rj   r7  r   r   )
rz  rF  r   r  r>  rz  r-  embedding_featuresr	  r
  s
             r8   .meta_embedding_bag_per_sample_weights_backwardr?
  _  s     1	LLZ 
LLq!	LL!#$,,q/K	LL"#	LLQ#556^^[N+FMr:   )assume_uniqueinvertc                   [         R                  " [        U [        5      =(       d    [        U[        5      S 5        [        U [        5      (       d  [         R                  " XR
                  S9n [        U[        5      (       d  [         R                  " XR
                  S9n[        U R                  5        [        UR                  5        [         R                  " U [         R                  S9$ )Nc                      g)Nz<At least one of elements and test_elements must be a Tensor.r4   r4   r:   r8   rg   meta_isin.<locals>.<lambda>|  r  r:   r  r   )
rX   rj   rt   r   r=  r   r-
  r`   r   r  )elementstest_elementsr@
  rA
  s       r8   	meta_isinrG
  w  s     
LL8V$I
=&(IN h''<<1E1EFmV,,]??K%hnn5%m&9&9:HEJJ77r:   r  c                     [         R                  " U S:  S 5        [        U[        R                  S9u  p#[         R
                  " XS9$ )Nr   c                      g)Nz,polygamma(n, x) does not support negative n.r4   r4   r:   r8   rg    meta_polygamma.<locals>.<lambda>  s    !Or:   r  r   )rX   rj   r   r   r  r   )r  r   rT   rU   s       r8   meta_polygammarK
    sB     
LLaOP(;HHOA D55r:   c                     [        S5      e)Nz.Tensor.item() cannot be called on meta tensors)r3  r   s    r8   meta_local_scalar_denserM
    s    
G
HHr:   c                 .    [         R                  " U 5      $ rD   rG  r   s    r8   silurO
    r  r:   c                 ^    [        U [        R                  S9u  p[        R                  " XS9$ r  )r   r   r  rX   r   )r   rT   rU   s      r8   sigmoidrQ
    s/     );HHOA D55r:   c                 |   U R                  5       S:H  nUR                  5       S:H  nU(       a  U(       a4  UR                  S5      U R                  S5      UR                  S5      /nGO*[        R                  " UR                  S5      UR                  S5      :H  S 5        U R                  S5      UR                  S5      /nOU(       a\  [        R                  " UR                  S5      U R                  S5      :H  S 5        U R                  S5      UR                  S5      /nOk[        R                  " U R                  S5      UR                  S5      :H  S 5        U R                  S5      U R                  S5      UR                  S5      /nU=(       d    U R                  n[        R
                  R                  (       aV  SUR                  -  nUS   U-   S-
  U-  U-  nXE:X  a  US   U-  US/n	OUS/n	[        R                  " XiX0R                  S	9n
U
$ [        R                  " XcU R                  S	9n
U
$ )
Nr   r   r3   c                      gNz matrix batch sizes have to matchr4   r4   r:   r8   rg   2_create_grouped_mm_output_tensor.<locals>.<lambda>      6Xr:   r   c                      grT
  r4   r4   r:   r8   rg   rU
    rV
  r:   c                      g)Nzbatched dimension has to matchr4   r4   r:   r8   rg   rU
    s    6Vr:   r  r	  )r   r   rX   rj   r`   r  r   itemsizer@  r   r   )rU  rW  offsrJ  
mat1_is_2d
mat2_is_2dr~	  	alignmentsize_paddedrL	  r   s              r8    _create_grouped_mm_output_tensorr_
    s   qJqJ		!diilDIIaLAHLL		!		!,.X 		!diim4HLL		!		!,.X 		!diil3H LL		!		!,.V 		!diilDIIbMBH'TZZI}}),,,	|i/!3	AIM#"1+3[!DJ%q)J!!	++

 J kk(DKKHJr:   mat_amat_brZ
  c	                 	  ^ ^^^^^ [         R                  " TS L TS L :H  S 5        TS L=(       a    TS Ln	U	(       a  [         R                  n
[         R                  R                  (       a`  [         R
                  R                  5       (       a=  S[         R
                  R                  S5      R                  ;   a  [         R                  n
[         R                  " T R                  U
:H  =(       a    TR                  U
:H  U U4S j5        O[[         R                  " T R                  [         R                  :H  =(       a    TR                  [         R                  :H  U U4S j5        [         R                  " T R                  5       S;   =(       a    TR                  5       S;   U U4S j5        T R                  5       S:H  nTR                  5       S:H  nU(       a  U(       d9  [         R                  " T R                  S	5      TR                  S
5      :H  S 5        U	(       aH  S nS n[         R                  " U" T 5      U 4S j5        [         R                  " U" T5      U4S j5        S nU" ST 5        U" ST5        TGbD  TGb@  [         R                  " TR                  [         R                  :H  =(       a    TR                  [         R                  :H  =(       dA    TR                  [         R                  :H  =(       a    TR                  [         R                  :H  UU4S j5        TR                  [         R                  :H  =(       a    TR                  [         R                  :H  mSU4S jjnTb  U(       a  U(       a  TR                   S   OSnU" STT SU5        U" STTSU5        [         R                  " US L S 5        U(       d  U(       a  [         R                  " TS LU U4S j5        Tbb  [         R                  " TR                  5       S:H  U4S j5        [         R                  " TR                  [         R"                  :H  U4S j5        O[         R                  " TS L S 5        [         R                  " US L S 5        [         R                  " US L =(       d    U[         R                  :H  S 5        [%        T TTU5      $ ) Nc                      g)Nz,Either both scale factors are given, or noner4   r4   r:   r8   rg   )_meta_grouped_mm_common.<locals>.<lambda>  s    >r:   gfx94r   c                  >   > ST R                    STR                    S3$ )Nz5Expected inputs of E4M3 FP8 type but got mat_a.dtype= and mat_b.dtype=r  r   r`
  ra
  s   r8   rg   rd
    s#    KEKK=Xijojujuivvwxr:   c                  >   > ST R                    STR                    S3$ )Nz1Expected inputs of BF16 type but got mat_a.dtype=rg
  r  r   rh
  s   r8   rg   rd
    s#    G}Tefkfqfqerrstr:   )r   r0   c                  L   > ST R                  5        STR                  5        3$ )Nz3Multiplicands must be 2D or 3D but got mat_a.dim()=z and mat_b.dim()=r   rh
  s   r8   rg   rd
    s%    Eeiik]Rcdidmdmdocpqr:   r   r   r  c                      g)Nz3contraction dimension of mat_a and mat_b must matchr4   r4   r:   r8   rg   rd
     rW  r:   c                 P    U R                  5       nUS   S:  =(       a    US   S:H  $ Nr  r3   r   r,  mat
mat_strides     r8   rq  -_meta_grouped_mm_common.<locals>.is_row_major
   s*    Jb>A%=*R.A*==r:   c                 P    U R                  5       nUS   S:H  =(       a    US   S:  $ rm
  r,  rn
  s     r8   ru  -_meta_grouped_mm_common.<locals>.is_col_major   s*    Jb>Q&=:b>A+==r:   c                  0   > ST R                  5       SS   3$ )NzNExpected mat_a tensor to be row major in the last two dimensions, got strides r  r,  )r`
  s   r8   rg   rd
     s!    dejeqeqestvtwexdyzr:   c                  0   > ST R                  5       SS   3$ )NzQExpected mat_b tensor to be column major in the last two dimensions, got strides r  r,  )ra
  s   r8   rg   rd
     s!    ghmhththvwywzh{g|}r:   c                   ^ ^^^ TR                  5       S-
  mSTR                  5       -  nTR                  5       mTTS-
     S:X  aJ  TT   [        STR                  TS-
     5      :  a'  [
        R                  " TT   U-  S:H  UU U4S j5        g TT   S:X  aM  TTS-
     [        STR                  T   5      :  a*  [
        R                  " TTS-
     U-  S:H  UU U4S j5        g [
        R                  " SUU4S j5        g )Nr3   r  r   c                  "   > ST ST  STT     S3$ )Nr   stride along % dim to be multiple of 16 bytes, got r  r4   end_dimmat_namerp
  s   r8   rg   F_meta_grouped_mm_common.<locals>.check_valid_strides.<locals>.<lambda>$   s'    )H:^G9Dijtu|j}i~~  Ar:   c                  .   > ST ST S-
   STT S-
      S3$ )Nr  rx
  r3   ry
  r  r4   rz
  s   r8   rg   r}
  +   sI    )H:^GaK=Hmnx  zA  DE  zE  oF  nG  GH  Ir:   Fc                  *   > ST ST R                    S3$ )NzInvalid strides/sizes, got z for strides and z for sizes.rR  rn
  s   r8   rg   r}
  0   s    5j\ARSVS\S\R]]hir:   )r   element_sizer   r  r   rX   rj   )r|
  ro
  r]
  r{
  rp
  s   `` @@r8   check_valid_strides4_meta_grouped_mm_common.<locals>.check_valid_strides   s    '')a-#**,,	ZZ\
gk"a'Jw,?3syy1%D
 -
 LL7#i/14 A  A%*Wq[*ASsyy!F
 +
 LL7Q;')3q8 I
 LLir:   r`
  ra
  c                  >   > ST R                    STR                    S3$ )NzhFor FP8 scales must both be float32, or for MXFP8 both scales must be float8_e8m0fnu. Got scale_a.dtype=z and scale_b.dtype=r  r   r  s   r8   rg   rd
  =   sR    ~  @G  @M  @M  N  Na  bi  bo  bo  ap  pq  rr:   r3   c                    >^ ^^^^^^	^
 TR                  5       S:X  a  [        R                  " TR                  5       U 4S j5        T(       a=  [        R                  " TR                  5       TR                  5       :H  UUU 4S j5        g [        R                  " TR                  5       S:H  UU 4S j5        [        R                  " TR                  S   TR                  T   T-  :H  UUUU U4S j5        g [        R                  " TR                  S5      S:H  U 4S	 j5        [        R                  " TR                  S   TR                  S   :H  UUU 4S
 j5        T(       a  [        R                  " TR                  TR                  S-
  :H  UUU 4S j5        TR                  u  mpVSn[        XW-  S5      m	[        US5      m
[        R                  " TR                  S   T:H  =(       a    TR                  S   T	T
-  :H  UU	U
UU4S j5        g [        R                  " TR                  5       S:H  UU 4S j5        [        R                  " TR                  S   TR                  ST-      :H  UUU U4S j5        g )Nr   c                     > ST  S3$ )Nr  z to be contiguous.r4   
scale_names   r8   rg   >_meta_grouped_mm_common.<locals>.check_scale.<locals>.<lambda>H   s    i
|3EFr:   c                  B   > ST ST R                    STR                    3$ )NzKFor MXFP8, scale must have same number of dimensions as target tensor, but  has mat.ndim= and scale.ndim=r  ro
  r  r
  s   r8   rg   r
  Q   sX    "mnxmy  zH  IL  IQ  IQ  HR  Rb  ch  cm  cm  bn  !or:   r3   c                  2   > ST ST R                  5        S3$ )Nr  z to be 1D tensor, but got 	D tensor.r   r  r
  s   r8   rg   r
  V   !    )J<7QRWR[R[R]Q^^g hr:   r   c                  V   > ST ST R                   T   T-   STR                   S    S3$ )Nr  z	 to have r  r   z
 elements.rR  )ro
  r  scale_multiplierr
  
scaled_dims   r8   rg   r
  Z   sU    )J<y:AVYiAi@jjyz  {F  {F  GH  {I  zJ  JT  !Ur:   r   c                     > ST  S3$ )Nr  z( to be contiguous in the last dimension.r4   r
  s   r8   rg   r
  _   s    i
|3[\r:   c                  P   > ST ST R                   S    STR                   S    S3$ )Nr  z batch dimension to be r   , got r  rR  r
  s   r8   rg   r
  c   s4    i
|3J399UV<.X^_d_j_jkl_m^nnopr:   c                  B   > ST ST R                    STR                    3$ )Nz0For MXFP8, 3d tensor should have 2d scales, but r
  r
  r  r
  s   r8   rg   r
  j   sD    "RS]R^^lmpmumulv  wG  HM  HR  HR  GS  !Tr:   r  r0  r  c                  N   > STR                    ST  STT-   STR                    3$ )NzFor MXFP8, expected mat.shape=z to have scale shape of (,z), but got rR  )G	blocked_K	blocked_Nro
  r  s   r8   rg   r
  t   sO    "@Kdefdgghiru~i~h  @K  LQ  LW  LW  KX  !Yr:   c                  2   > ST ST R                  5        S3$ )Nr  z to be 2D tensor, but got r
  r   r
  s   r8   rg   r
  y   r
  r:   c                  V   > ST ST R                   ST-       STR                   S    S3$ )Nr  z non-batch dimension to be r3   r
  r  rR  )ro
  r  r
  r
  s   r8   rg   r
  }   sR    )J<7RSVS\S\]^ak]kSlRmmstytt  AB  uC  tD  DE  !Fr:   )r   rX   rj   rq  r   r   r   r?   )r
  r  ro
  r
  r
  r  r  r  r
  r
  r
  is_mxfp8s   `````   @@@r8   check_scale,_meta_grouped_mm_common.<locals>.check_scaleD   s   wwyA~'')F LL		swwy0 o
 LL		q(h LLA#))J*?BR*RR U  U
 LL$)\ KKNciil2p LL

chhl2 T "iiGAq!#J ( ;I (C 0ILLA!+WA)iBW0W Y  Y
 LL		q(h LLA#))A
N*CC Fr:   rZ  r[  c                      g)Nz:Scale result tensor provided, but it is not supported yet.r4   r4   r:   r8   rg   rd
     rq   r:   c                  N   > ST R                  5        STR                  5        S3$ )Nz/Offsets tensor not provided, but is needed for zD/zD multiplicand layouts.r   rh
  s   r8   rg   rd
     s(    Eeiik]RTUZU^U^U`Taaxyr:   c                  ,   > ST R                  5        S3$ )Nz.Offsets tensor must be 1D, but got offs.dim()=r  r   rZ
  s   r8   rg   rd
     s    HTUVr:   c                  $   > ST R                    S3$ )Nz7Offsets tensor must be integer (int32) tensor, but got r  r   r
  s   r8   rg   rd
     s    QRVR\R\Q]]^_r:   c                      g)NzJOffsets tensor provided, but is not needed for 3D/3D multiplicand layouts.r4   r4   r:   r8   rg   rd
     s    `r:   c                      g)Nz2Bias tensor provided, but it is not supported yet.r4   r4   r:   r8   rg   rd
     s    Dr:   c                      g)Nz4If output dtype provided, it must be torch.bfloat16.r4   r4   r:   r8   rg   rd
     s    Fr:   r;  )rX   rj   rp  r  r  r   r  get_device_propertiesgcnArchNamerb  r`   ro  r   r   rm  r  r   rP  r_
  )r`
  ra
  rZ  r[  rZ
  rH  r\  rJ  r]  scaled	fp8_dtypemat_a_is_2dmat_b_is_2drq  ru  r
  r
  r
  r
  s   `````             @r8   _meta_grouped_mm_commonr
    s    
LL	Dgo.> D 8WD%8F ''	MM

''))5::;;A>JJJ--IKK9$A	)Ax	

 	KK5>>)KekkU^^.Kt	

 
LL		v7%))+"7q
 ))+"K))+"KkJJrNejjn,I	

 	>	> 	z	
 	}	

0 ''w2]]emm+N0N !5!55 :MMU%9%99 r	
 MMU111 6!5!55 	
:	z "-++DJJqMST 	 	Iwq2BCIwq2BCD P	

 ky	
 LL
aV LL

ekk)_
 	DL`	

 
LLD
 
LLT8Y%..8F
 ,E5$	JJr:   c                 "    [        U US S UUS US9$ )N)rZ  r[  rZ
  rH  r\  rJ  )r
  )r`
  ra
  rZ
  rH  rJ  s        r8   meta_grouped_mmr
     s)     #	 	r:   c	                 V    U=(       d    [         R                  n[        U UUUUUUUUS9	$ )N)rZ  r[  rZ
  rH  r\  rJ  r]  )rX   ro  r
  )	r`
  ra
  rZ  r[  rZ
  rH  r\  rJ  r]  s	            r8   meta_scaled_grouped_mmr
     s:     +U^^I"!%
 
r:   c                 H   [        U5      [        S5      :X  a2  U  H,  n[        R                  " UR                  5       S:  S 5        M.     / nU  HO  nUb  UOUR                  nUR
                  (       a  [        U5      nUR                  UR                  SUS95        MQ     U$ )Ninfr   c                      g)Nz:_foreach_norm cannot compute infinity norm on empty tensorr4   r4   r:   r8   rg   #meta_foreach_norm.<locals>.<lambda>   s    Tr:   r4   r   )	r\   rX   rj   r   r`   r   r   r   r   )tensorsordr`   r  resultsrJ  s         r8   meta_foreach_normr
     s    SzU5\!ALL	AT 
 G".EAGG	0;Iq{{2Y{78	 
 Nr:   r=   half_to_floatc                 \   U(       aG  U R                   [        R                  [        R                  4;  a  [	        SU R                    S35      e[
        R                  " U [
        R                  R                  S9u  p4U(       d  UOUn[        R                  " X[        R                  S9nU$ )Nz%half_to_float is True but x.dtype is z, expected half or bfloat16r  r	  )r`   rX   rZ   ro  r   rQ   r   r   rR   r   r   )r=   r   r
  computation_dtyperU   r  s         r8   softmaxr
     s     775::u~~66 7y@[\  ',&>&>	uDDLL'# (5<:KL


1@W@W
XCJr:   c           	        ^^^^	^
^ [         R                  " [        T5      S-  S:H  U4S j5        U R                  m[        T5      m
[        T5      S-  nT
U-
  m	[         R                  " T
U:  U
U4S j5        [	        S T 5       5      (       a  U n[        T	T
5       Hx  mST
T-
  S-
  -  mTT   S:  a*  UR                  TTT   * UR                  T   TT   -   5      nTTS-      S:  d  MO  UR                  TSUR                  T   TTS-      -   5      nMz     UR                  5       $ [        TS T	 5      n[        U5       Ha  m[        T5      TS-   S-  -
  mTT	T-      TT   -   TTS-      -   n[         R                  " US:  UUU	UU4S j5        UR                  U5        Mc     [         R                  " UU R                  U R                  U R                  [        U 5      S9$ )	Nr   r   c                      > S[        T 5       3$ )Nz1Length of pad must be even but instead it equals r  r  s   r8   rg   '_constant_pad_nd_meta.<locals>.<lambda>!  s    CCH:Nr:   c                  (   > S[        T5       ST  S3$ )Nz`Length of pad should be no more than twice the number of dimensions of the input. Pad length is z while the input has z dimensions.r  )l_inpr  s   r8   rg   r
  !  s      225c(;P'r:   c              3   n   #    U  H+  n[        U[        R                  5      =(       a    US :*  v   M-     g7fr 	  )rt   rQ   IntWithoutSymInt)rv   ru  s     r8   rx   (_constant_pad_nd_meta.<locals>.<genexpr>!  s)     
IS:a//0;Q!V;Ss   35r3   c            	      F   > STTT -       STT    STTS-       STT -    S3	$ )NzThe input size z, plus negative padding r   r3   zG resulted in a negative output size, which is invalid. Check dimension z of your input.r4   )r   r  l_diffr  pad_idxs   r8   rg   r
  )!  sE    ok&1*&=%>>V7|nE#gk"2!3 4117!OMr:   )r`   r   r   r   )rX   rj   r   r   r  r   narrowr   r   r   r   r`   r   r   r   )r   r  r  l_padc_input	new_shapenew_dimr   r  r
  r
  r
  s    `     @@@@@r8   _constant_pad_nd_metar
   !  s    
LLC1N
 ++KEHMEU]F	LL	 
IS
IIIvu%A519q=)G7|a!..G}gmmA&6W&E 7Q;!#!..Aw}}Q/?#gPQkBR/RS & }}[&)*I5\c(q1uk*fqj)CL83w{;KKqLM M	
 	!  ;;kk||))+E2 r:   r-  r:  r;  c                 B   U R                  5       S:w  a  [        SU R                  5        S35      eU R                  nUR                  nUR                  S:X  a  US   4nO%UR                  S:X  a  US   US   4nO
/ UQUS   P7nU R                  nU R                  XxS9$ )Nr   z'weight' must be 2-D, got z-Dr   r3   r   )r   r   r   r   r`   r   )	rF  r   r-  r:  r;  weight_shapeindices_shaper   rJ  s	            r8   	embeddingr
  8!  s     zz|q9&**,rJKK<<LMMM||q&21o%7			"1%|A7	5m5\!_5	II77r:   max_lengthspadding_valuec                    [        U5      S:w  a  [        S[        U5       S35      e[        U5      S:w  a  [        S[        U5       S35      eUS   R                  S   S-
  nUS   nXE/U R                  SS  Q7nU R                  U5      $ )Nr3   z&Only one jagged dim is supported, got z offsetsz max_lengthsr   )r   r   r   r   )r   r  r
  r
  r  r  rt  s          r8   $meta__jagged_to_padded_dense_forwardr
  Q!  s     7|q4S\N(K
 	
 ;14S5E4FlS
 	
 	
aAAA,6<<+,LL))r:   c                 B    [        U 5      [        5       S 5       5       nU$ )Nc                 2    [        U [        R                  S9$ rn  rV   r   r  rH  s    r8   _f)_create_unary_float_meta_func.<locals>._fi!  s      =JJ
 	
r:   rM   r$   funcr
  s     r8   _create_unary_float_meta_funcr
  h!  *    4]
  

 Ir:   c                 (   U R                   (       d"  UR                   (       d  UR                   (       a  [        S5      eU R                  5       S:X  a,  U R                  U R                  5      U R                  S5      4$ U R                  S5      nU R                  S5      nUR                  S5      nU R                  XU5      nU
(       a4  U(       a  U R                  XU5      nUU4$ U R                  XX5      n UU4$ U R                  S5      nUU4$ )NzP_native_multi_head_attention fake implementation does not support nested tensorsr   r3   )	is_nestedr  r   r   r   r   )r  r   r  	embed_dimr  
qkv_weightqkv_biasproj_weight	proj_biasr  need_weightsaverage_attn_weights	mask_typer  T
output_dimr
  attn_weightss                     r8    native_multi_head_attention_faker
  t!  s      #--5??!^
 	
 {{},eooa.@AA

1A

1A !!!$J__Q:.F !??13L L!!	 !??1=L L!! q)L!!r:   c                 B    [        U 5      [        5       S 5       5       nU$ )Nc                 2    [        X[        R                  S9$ rn  r
  r<   s     r8   r
  *_create_binary_float_meta_func.<locals>._f!  s      !@!M!M
 	
r:   r
  r
  s     r8   _create_binary_float_meta_funcr
  !  r
  r:   c                    ^  [        T 5      U 4S j5       nT R                   S3nX!l        [        [        [        U5      5      " U5      nU$ )Nc                 `   > T" U /UQ70 UD6n[        U R                  UR                  5        U $ rD   r  )r   rS   rO  r   rF   s       r8   _fn#_register_inplace_meta.<locals>._fn!  s.    '''

CII6r:   rT   )r   r   rM   getattrr-   )rF   r
  inplace_names   `  r8   _register_inplace_metar
  !  sK    
2Y 
 kk]!$LL
l3
4S
9CJr:   c                 z  ^ ^^ [         R                  " T R                  TR                  :H  UU 4S j5        T T/n[        T[        5      (       aT  TR
                  S:w  a3  [         R                  " T R                  TR                  :H  U U4S j5        UR                  T5        [        US[        R                  06$ )Nc                  <   > STR                    ST R                    3$ )Nr  z for `end`, but got dtype r   )r   r   s   r8   rg   lerp.<locals>.<lambda>!  s    /%++.HTr:   r   c                  <   > ST R                    STR                    3$ )Nr  z for `weight`, but got dtype r   )r   rF  s   r8   rg   r
  !  s    /%++6STZT`T`Sabr:   rN   )
rX   rj   r`   rt   r   r   r   rV   r   rR   )r   r   rF  rS   s   ``` r8   lerpr  !  s     
LLsyy T 3<D&*%%;;!LLv||+b 	F	=EE r:   )r  c                4    [        XU[        R                  S9$ rn  ro  r   tensor1tensor2r  s       r8   addcmulr  !  s     0O0W0W r:   c                    [         R                  " [        R                  " UR                  5      =(       a     [        R                  " UR                  5      (       + S 5        [        XU[        R                  S9$ )Nc                      g)N)zFInteger division with addcdiv is no longer supported, and in a future zErelease addcdiv will perform a true division of tensor1 and tensor2. z4The historic addcdiv behavior can be implemented as zA(input + value * torch.trunc(tensor1 / tensor2)).to(input.dtype) zfor integer inputs and as z6(input + value * tensor1 / tensor2) for float inputs. z?The future addcdiv behavior is just the latter implementation: z4(input + value * tensor1 / tensor2), for all dtypes.r4   r4   r:   r8   rg   addcdiv.<locals>.<lambda>!  s     	
r:   r  )rX   rj   rQ   rW  r`   rV   r   rR   r  s       r8   addcdivr
  !  s`     
LL""7==1 6&&w}}5	
		
  0O0W0W r:   c                  ,   0 n S H"  n[         U   nU H  nX0;  d  M
  X#   X'   M     M$     U R                  5        GH  u  pE[        U[        R                  R
                  5      (       a  M1  [        U[        5      (       d  [        S[        U5       35      eUR                  [        R                  R                  R                  5      " U5        [        R                  R                  UR                  5       S5      (       a  U[         S   ;   a  [        U S35      eM  UR                   (       a  M  UR                  5       S;   a  GM  SUR                  5       ;   a  ["        R%                  XE5        GM<  SUR                  5       ;   a  [&        R%                  XE5        GMh  S	UR                  5       ;   a  [(        R%                  XE5        GM  S
UR                  5       ;   a  [*        R%                  XE5        GM  [,        R%                  XE5        GM     g )N)r   post_autogradpre_autogradz$op_overload must be OpOverload, got CompositeImplicitAutogradr   z is a CompositeImplicitAutograd op, we shouldn't register meta function for it. Instead, we should let the decomposition run and write meta kernels for the base operators.>   aten::cloneaten::copy_aten::rot90aten::_to_copyaten::empty_stridedaten::constant_pad_ndaten::as_strided_scatterzmkldnn::zmkl::zonednn::zquantized::)r   itemsrt   rX   _opsHigherOrderOperatorr   r   r   py_impl_CDispatchKeyr/   %_dispatch_has_kernel_for_dispatch_keyr  r3  is_view2_meta_lib_dont_use_me_use_register_meta_for_mkldnnimpl/_meta_lib_dont_use_me_use_register_meta_for_mkl2_meta_lib_dont_use_me_use_register_meta_for_onednn5_meta_lib_dont_use_me_use_register_meta_for_quantized'_meta_lib_dont_use_me_use_register_meta)activate_meta_tabletypregistryopoop_overloadrF   s         r8   activate_metar)  "  s    9-c2C-+3=#(  9 /446
 k5::#A#ABB+z22 6tK7H6IJ  	EHH00556r:8899 ;
 
 8@@""m $; ;  A    	 [--//BGGXK,,..?DD[U{//11BGGX+"2"2"44EJJ 8<<[Ms 7r:   )Fr	  rD   )NNNFr   r3   r   r:  )Tr  )r  )r  T)FF)TT)r|  )FTN)TFF)TF)r   )g      ?N)r@   str)r4   r  r;  F)r4   r  FTN)Fr   FNFr   )NF)r   F)g      ?gUUUUUU?FN)NNNNN)r   NNr3   )NNF)        FFN)Nr+  FFN)r+  FNN)Nr+  FNN)r+  FN)FN)FNNNN)NNNF)NNNNF)Nr   FNN)NNNN)r   TT)NNr   N)d   r   r   )r   )r   N)r   FF)r+  )NTTN(  r  collections.abcr   r   enumr   	functoolsr   typingr   typing_extensionsr   rX   torch._prims_commonr  rQ   r	   r
   r   torch._decompr   r   r   r   
torch._opsr   torch._primsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   torch._prims_common.wrappersr    r!   r"   r#   r$   r%  r%   r&   torch.fx.experimentalr'   r  torch.nn.functionalr(   r)   torch.utilsr*   rI   r+   r,   opsr-   libraryLibraryr#  r   r7  r9  r8  r9   r?   rM   rV   rb   rl   linspacelogspacer  r   taker6  r   r   r   r   cummaxcumminr   r   r   r   r   r  r   _fft_c2cr   r   r   _fft_r2cr  randpermgenerator_outr  r   r  randintr#  r  low_outr'  randr)  _fft_c2rr.  r  r:  rA  
unsqueeze_rE  _sparse_semi_structured_linearr*  r`   rT  _sparse_semi_structured_mmra  _sparse_semi_structured_addmmre  _cslt_sparse_mmru  index_reducer|  index_reduce_r~  index_selectr  segment_reducer  r  	unary_outr  r   r  r  r  r  r  r  r  _assert_asyncr  msgr  _printr  _make_dep_tokenr  r  _functional_sym_constrain_ranger  r  (_functional_sym_constrain_range_for_sizer  _functional_assert_asyncr  r   r  r   r  r  r  r  _linalg_eighr  r  _linalg_eigvalslinalg_eigvalsr  
linalg_eigr  r  r  r  r$  r(  r.  rB  linalg_inv_exrE  linalg_ldl_factor_exri   rK  linalg_ldl_solver[  	linalg_lurc  linalg_lu_factor_exrh  linalg_lu_solverr  	lu_unpackry  r  	linalg_qrr  r  r  _linalg_svdr  rX  r   r  r  linalg_solve_triangularr  r  r  _linalg_detr  r  r  r  reflection_pad1dr  replication_pad1dr  r'  reflection_pad1d_backwardr,  replication_pad1d_backwardr.  r?  reflection_pad2drA  replication_pad2drE  _weight_norm_interface_backwardrN  reflection_pad2d_backwardr(  replication_pad2d_backwardrV  re  reflection_pad3drg  replication_pad3drk  reflection_pad3d_backwardreplication_pad3d_backwardrt  _pdist_forwardr\   ry  _pdist_backwardr  baddbmmr  	bernoullir  
bernoulli_r  ru  r  poissonr  _fused_moving_avg_obs_fq_helperr  mmr  r  r  r  r  miopen_batch_normr  convolutionr  r  _has_mkldnnr  r  _convolution_pointwiser  _linear_pointwiser  has_mklr   r  _mkl_linearr  r!  r  qconv2d_pointwiseqconv_pointwiser=  r"  binarybinary_tensorr-  qlinear_pointwiser2  r7  linear_dynamic_fp16linear_relu_dynamic_fp16r9  r"  r:  
max_pool2drE  int4mm_packed_weight_cpurV  r\  
avg_pool2dr  r  avg_pool2d_backwardr  
avg_pool3dr  avg_pool3d_backwardr  _adaptive_avg_pool2dr  _adaptive_avg_pool3dr  _adaptive_avg_pool2d_backwardr  _adaptive_avg_pool3d_backwardr  r  adaptive_max_pool2dr  r  r  adaptive_max_pool3dr  r  r  repeat_interleaver   ru   r  r
  r  r   _unsafe_indexr1  convolution_backwardrC  addbmmrL  randint_likerP  _fused_adam__fused_adamw_rh  _fused_adamrp  _int_mmrz  _convert_weight_to_int4packr  #_convert_weight_to_int4pack_for_cpur  _weight_int4pack_mmr  _weight_int4pack_mm_for_cpur  r  r  r  _dyn_quant_pack_4bit_weightr  _dyn_quant_matmul_4bitr  _weight_int8pack_mmr  _cdist_forwardr  _cdist_backwardr  _embedding_bagrC  _embedding_bag_forward_onlyrE  rH  nansumrJ  median	nanmedianrL  
dim_valuesrz  r   rO  logical_not_rQ  repeatrZ  zero_r\  mul_Scalardiv_logical_and_logical_or_logical_xor_r_  add_sub_rl  r  subrq  rounddecimalsrs  rz  
__rshift__r~  
__lshift__r  zeror  rn  r  fillr  relu_r  	_add_relur  rrelu_with_noiser  rrelu_with_noise_functionalr  rrelu_with_noise_r  	index_put_unsafe_index_putr  masked_fill_r  _masked_scaler  masked_scatter_r  masked_scatterr  masked_scatter_backwardr  
index_put_r  r  bmmr  r  r  r  rz  r{  r  r  r  r?   max_pool2d_with_indices_backwardr  max_pool2d_with_indicesr  fractional_max_pool2dr  max_pool3d_with_indicesr/   max_pool3d_with_indices_backwardr7  rB  rD  rQ  grid_sampler_2d_backwardrY  r_  ra  rd  r  onesrs  zerosrv  select_scatterrz  slice_scatterr~  r   r  r  gatherr  r  r  r  r  r  scatter_addr  scatter_add_r  r  r7  r  rx  value_reducer  scatter_r  #_scaled_dot_product_flash_attentionr  r  r  #_scaled_dot_product_cudnn_attentionr  0_scaled_dot_product_fused_attention_overrideabler  ,_scaled_dot_product_flash_attention_backwardr  +_scaled_dot_product_flash_attention_for_cpur  4_scaled_dot_product_flash_attention_for_cpu_backwardr  *_scaled_dot_product_attention_math_for_mpsr&  '_scaled_dot_product_efficient_attentionr*  0_scaled_dot_product_efficient_attention_backwardr2  ,_scaled_dot_product_cudnn_attention_backwardr4  _flash_attention_forwardr<  r>  _flash_attention_backwardrC  _efficient_attention_forwardrO  _efficient_attention_backwardSymIntrY  r  
_scaled_mmr  r	  _scaled_mm_v2r	  scatter_reducetwotwo_outr	  scatter_reduce_r	  multinomialr	  r	  r'	  r0	  _upsample_nearest_exact1dr:	  _upsample_nearest_exact2drC	  "_upsample_nearest_exact2d_backwardrH	  _upsample_nearest_exact3dr   rJ	  values_stablerM	  re	  _thnn_fused_lstm_cellrk	  r	  r	  r	  r	  r	  r	  argminr	  r	  topkr	  _segment_reduce_backwardr	  kthvaluer	  r   r	  r	  r	  r	  pixel_shuffler	  r	  	bucketize
Tensor_outr	  
Scalar_outr	  histcr	  _upsample_bilinear2d_aa_upsample_bicubic2d_aar
   _upsample_bilinear2d_aa_backwardr
  r
  r
  r  r
  searchsortedr'
  r-
  embedding_dense_backwardr0
  _embedding_bag_backwardr5
  _embedding_bag_dense_backwardr3
  *_embedding_bag_per_sample_weights_backwardr?
  isinrG
  	polygammarK
  _local_scalar_denserM
  rO
  rQ
  r_
  r
  _grouped_mmr
  _scaled_grouped_mmr
  _foreach_normr
  _softmaxr
  constant_pad_ndr
  r
  _jagged_to_padded_dense_forwardr
  r
  _native_multi_head_attentionr
  r
  special_airy_aispecial_bessel_y0special_bessel_y1special_modified_bessel_i0special_modified_bessel_i1special_modified_bessel_k0special_modified_bessel_k1!special_scaled_modified_bessel_k0!special_scaled_modified_bessel_k1special_chebyshev_polynomial_tspecial_chebyshev_polynomial_uspecial_chebyshev_polynomial_vspecial_chebyshev_polynomial_w&special_shifted_chebyshev_polynomial_t&special_shifted_chebyshev_polynomial_u&special_shifted_chebyshev_polynomial_v&special_shifted_chebyshev_polynomial_wspecial_hermite_polynomial_hspecial_hermite_polynomial_hespecial_laguerre_polynomial_lspecial_legendre_polynomial_pr
  r  r  r
  lerp_addcmul_addcdiv_torch._refs.nn.functionaltorch._refs.specialr)  r4   r:   r8   <module>r1     sM    .    '  # + +  " U     < 7 8 ) T]t_yy~~*/--*?*?PV*W ' %a )X"

8BF#3"4hr2v6F"FG 
3(* t}}-.
 

==7  /7t 		!!499==12'  3' !!))4+<+<+@+@AB%' %  C%$ t%%&I  'I 	[[$++//4;;+>+>P Xy! " !!))4+<+<+@+@ABI  CI3lV $s) 4  %%t}}'8'89:K  ;K $s)  %%t}}'8'89:8
  ;8
v t}}**+"& 3 ,3 t}}$$% **
 &
 $$dll&6&678
 **  9&   $,,"6"678 **  9& 		!!499==12%)$tPT   3 %%t}}'8'89:$Dv $DDI $Dc $DC $D  ;$DN tzz!!" #0	( t&&' ( t223
 "&$(((( ( 4-	(
 4Z( {{T!( 4(V t../
 %)	
  {{T!	 0> t112 	
$())
) ) 	) {{T!) 3)X t##$ $(";<,,;<\\;< 4-;< D=	;<
 {{T!;< ;< ;< ;< ;< %;<| t  (() 	I
	I		I 	I LL		I
 	I 	I 	I *	I t!!))* 	
			 	 LL		
 	 	 	 +	 t  (()' * ' t""**+
 "!! W
 W W d]	 W
 d] W d] W  W  W  W , WF   $(("4"456  7 txx||    $(("4"456  7 txx||  tzz!!"6 #6 tzz~~( (
 t!!))* + t!!%%& ' t{{""# $ t##++, ) -) t''//0, 1, t33;;< =
 t008896 :6& t<<DDE F
 t,,001 2

F 
C 
    F  #  N (,


 !%
$V S C 
 
F 
$ 
 
"  	  	C  !!))4+<+<+H+HIJ]N+ s T  , K" $$,,d.A.A.E.EFGB B6 B  HB  !]N+6  , " Q QF Q t**+) )F )4 )F )  ,) t""#J JF J4 JF J  $J t}})6 )$ )6 )  ) t$$%)6 )$ )6 )  &) t&&../&  T  0" 	$$,,d.M.M.Q.QR .f .6 .f . .d t!!))*&   + ))1143L3L3P3PQRT8V$ 	
  	
 666!" % S& %%--t/D/D/H/HIJ ''' '
 ' '  K'T &&(:(:;<S#s/3 f  fff>T8U   =6 ((00$2J2J2N2NOPT8V$ 	     	 
 666!"  % Q F $$,,d.B.B.F.FGH 444 4
 4 4 4  I4n t~~S#s 	$$$ $ 	$
 666!"$  $P tTz!2 * &&(:(:;<S#f C ffn8M   =4 $$,,d.B.B.G.GHIV[$1'v '%(F"G ' 2 J'$ t''(  	""" " $J	" )"J.
.
. 49d3i .".
.
. *. 66>	.(f V   t$$%
   777 	7
 7 TM7 	7 TM7 4-7 6666)*7 &7t ,,44d6R6R6V6VWX  	
   
$  Y4 t$$%S#4( +(
+(+( +( 	+(
 +( 66>+( ) &+(^ t''(
 )
 tzz
 WW	W W 	W
 W W  Wt>#;L t$$%=  &= t%%&>  '>(< t--.\S  /S t../\T  0T2Ej t$$%=  &= t%%&>  '> ((00 &&..&&11''//''22	 \& &:<G~ t$$%=  &= t%%&>  '> &&..&&11''//''22	 \(( ((V t""#

f 

 

v 

  $

 t##$Pv PV P Pf PQW P  %P $$dll&6&678/0 '  9': &&(:(:;<&* I  =I
 t$$% & t~~ I !I
 $$dll&6&678"  9" t33;;< * =*. tww3U[[4/ 3  3,B
* .2B,,BLLB IOB #Y_	B
 3i#oB B B IOd*BJQ t%%--."$,,"$LL"$ ,,
"$ ,,%	"$
 $"$ "$ !&"$ "$ /"$J t''(,,LL ,, I	
 #Y 3i  I  )> 	889>9N9N&&:6 599##::BBC D, 599##55==>S ?S
 xx:?--:O:O66;
7 
uyy}}00	1	 
2	
 :?9N9N&&:6 599##55==>599##33;;<599##33::;4 < = ?4l 599##55<<=599##55CCD E >< 599##55==>599##55<<= > ?> 599##55<<=599##55CCD$ E >$L 599##77??@599##<<DDE	 F A	 =BMM<Q<QVV=9 599&&112 
 3
8 599&&??@@ A@, t&&' M (Mb(<X t''//0E 1EP t UJ   UJp t''(\K(  )K(\ t((001 2" t((001@ 2@ t1199:F ;F, t112\P  3P
	
6 	
S 	
 t''(UI+  )+\ t001\H  2H$ t''(UI'  )'T t001\(  2(
 t%%,,-* .* $$dll&6&678
  9
 ##++T-@-@-D-DEF46 

c 

  G

 		&&..		0F0F0J0JKL  M" 

!!4#5#5#<#<=>A ?AH ))11234H 44Hn ##T[[__56./q '  7'0 !!(()*' +' !!))4+=+=+E+EFG  !
 H
2   (()*  !! +!H ~B  B* 0012 3& 889: ; (()*8 +8  0012< 3< >>?@< A<"3 "3 "3 "@F 0012D!'$D 3D4 ++,-; .;( (()*< +< t""**+ & , &F t##$G  %G* t""**+
 	
`5 ,`5F t//7785 95
 ##T[[__56=$ =  7= ##T^^%;%;<=) >) !!					 Xy! "	 t  (() * t{{""#' $'& tzz!!" # 								!!  !!

 									**Z  

""DJJ$7$789 :
" &&(>(>?@ A &&(>(>?@ A tyy  !& "& 

!!4::#4#456 7 		  $))"2"234" 5" tzz!!" # t~~$$%F   & %%&'RV"  (" 0012RV; 3; &&'(KO ) &&(>(>(F(FGH" I" t  ''( )
 t!!))* + t##$	 %	 t""#6  $6 t++,! -! t&&' ((V txx 5 !5 txx~~J J6;h #-YYY 	Y 		Y
 	Y 	Y 	Y 	Y 	Y 	Y 	Y Y Y Y Y  !Y" #Y$ %Y& 'Y( )Y* +Y, -Yx;4|383838 38 		38
 	38 	38 	38 	38 	38 	38 	38 	38 38 38 38  !38" #38$ %38& '38lI2X t44<<=( >(V t++334 # 5#L t))112Q 3Qh t++,UI _  -_D t445\\  6\~%
V %
6 %
Pt  v 3 $ t,,445# 6#$ t##$8  %8" t,,-\;'! ( .!, 		!!"#. $. t&&' ) ()X 		!!499==12   3( 

""DJJNN34   5( t""**+. ,. t!!))*. +.
 C d  /
  t{{""#' $'6
 
%R4 t''(& )&
 t  ! "
 !!	 & & ""	 88@@AB
 #<<	< < 	<
 < < 4<< C<~ 88BBCD #	  }	
 } }    4< E4S#X& 889: #((	( ( }	(
 ( ( ( ( 4<( ;(V EEFG
  $#''	' ' }	'
 ' ' ' 4<' H'T 99( """ 
" 	"
 
" " " " " " " " " " 4<"
". 88 #	  	
  } 4<
: AA  $!"!"!" 
!" 	!"
 
!" !" !" !" }!" 4<!"
!"H ??@A
  $"&3&3&	3& 3& }	3&
 3& 3& 4-3& 4<3& 66>3& B3&l <<=> ))))	)) )) }	))
 )) )) 4<)) ?))X ==" 4-4-4- 
4- 	4-
 }4- 
4- 4- 4- 4- 4- $Z4- 4- 4<4-
4-n 99* !""" 
" 	"
 
" " " " " " " " " " "  4<!"
"0 %%--  #'$(#"&HH	H H }	H
 }H H H H H H 4<H DjH TzH }H 4-H
HV --7789 #'$(#"&%''	' ' }	'
 }' ' ' ' ' ' }' }' }' 4<' Dj'  Tz!'" }#'$ 4-%' :'T &&( #'$(#,,, 
, 	,
 
, , , , , , , , , , 4<,  Dj!," Tz#,
,4 ))   %%)""/S/S	/S /S 4-	/S
 4-/S 4-/S */S */S /S /S /S 4</S d]/S tm/S t/S
/Sd *** !%"'%474747 
47 	47
 4-47 4-47 4-47 ,,47 ,,47 47 47 47 47 47 47  4<!47" $J#47$  %47
47x !%(,$( zY
,,zY
,,zY \\zY \\	zY
 ,,
zY ,,%zY {{T!zY zYz ''() !%(,$( 
,,
,, \\ \\	
 ,,
 ,,% {{T!  *, !%$(*.*. dC
,,dC
,,dC %,,dC %	dC
 %,,dC %dC ,,
dC {{T!dC K 4'dC K 4'dC dCN ""**+, !%'+)- 
,,
,, %,, %	
 K  %,, % K  ,,
 ++$ 3i$&  -: ##'')<)<)D)DEF&  G&
 t##''( )
   (($*:*:*>*>?@	 	  A	,* 	$$d&D&D&L&LM

 	$$d&D&D&L&LM. ((00//77 "!#,- u||+, dl	
 dl: 	$$d&D&D&L&LM

 									*$N t))112
  3 t&&'4/ (4/n t&&') ()X t$$,,-$% .$%N


 ##T[[%8%89:4 ;4 t!!))* + tyy  !
Q "
Q t,,-LP  ." %%t}}';';<=Xy!K " >K  #("9"9 Q t77??@	* A	* t##++,	0 -	0 t!!))* +@ t--556F 7FD %%t~~'@'@AB27u   C %%t~~'@'@AB
 



 	

 
 C
 

|E  E4 	!!))4+F+F+N+NO & 55==>?  @8 t>>FFG H$ '')<)<=>"  ?" uyy~~(() *: uyy~~  !6 "6( t  !
 
	-K  "-K` t,,- . t++, '
 -'
T t112  3, t>>?  @. tyy8=e 8  8  t~~6c 6 6F 6  6 t''(I& I )I tyy"v "& "  " t||6& 6V 6  6'^ (,$( VKVKVK \\D VK \\D 	VK
 4-VK 4-VK ,,%VK {{T!VK VKr t  $( 4- 4-	
 {{T!   !& ''() !% $(,$( <<<< \\ \\	
 ,,
 ,,
 ,,% {{T!  *6 t!!(() *  t}}v C       t##$3  %3l t~~ $888 8 	8
 8 8  8. t33;;<
 	**&\* c* 	* =*, t00889 
-" :-"` d22 3 d44 5 d44 5 d== > d== > d== > d== > dDD E dDD E tBB C tBB C tBB C tBB C tJJ K tJJ K tJJ K tJJ K t@@ A tAA B tAA B tAA B tyy  $ t||./    t||./   , 	tyy)!$,,/!$,,/
    ENP r:   