
    N j*                        S SK r S SKJr  S SKrS SKJr  S SKJrJrJ	r	J
r
  S SKJr  SS jrS rSS jrS	 r\ R"                  S
\S   4S j5       r\ R"                  S
\S   4S j5       r\ R"                  S
\S   4S j5       rg)    N)	Generator)global_decomposition_table)_rnn_helpergather_paramsgru_cell	lstm_cell)
while_loopc                 (  ^^^^ US   nUS   mU(       a  US   OSnU(       a  US   OSm[        U5      S:X  a  US   O[        U5      S:X  a  US   OSmUS   R                  S5      nUS   R                  S5      n[        R                  R                  R                  XU5      mU(       a  TR                  S5      OTm[        R                  " TR                  S5      /[        UR                  SS 5      Q7UR                  UR                  S.6n	U4S	 jn
UUUU4S
 jn[        R                  " S[        R                  S9n[        XXXx/5      u  pnnU(       a  UR                  S5      nXR!                  S5      UR!                  S5      44$ )aQ  
1 layer fn for while loop LSTM

Args:
    inp: Input tensor of shape (seq_len, batch, input_size)
    hidden: Tuple of (hx, cx) hidden states
    params: List of weight and bias tensors
    has_biases: Whether biases are included
    reverse: Whether to process sequence in reverse

Returns:
    Tuple of (output, (final_hx, final_cx))
r         N         dtypedevicec                 ,   > U TR                  S5      :  $ Nr   size)iouthxcxprecomputed_inputs       f/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/torch/export/_patches.pycond_fn*one_layer_while_loop_lstm.<locals>.cond_fn.       $))!,,,    c           
        > U R                  5       n[        R                  " U5        [        R                  " UTR                  S5      S-
  S9  [	        TU   X#TTTSS9u  p#UR                  5       nUR                  S5      X'   U S-   XU4$ )Nr   r   maxr   )	chunk_dim)itemtorch_check_is_sizer   r   clonesqueeze)	idxr   r   r   r   hh_bias	hh_weight	hr_weightr   s	        r   body_fn*one_layer_while_loop_lstm.<locals>.body_fn1   s    HHJQQ$5$:$:1$=$ABa ")WiST
 iikAQw##r    r   )len	unsqueezer&   nn
functionallinearflipemptyr   tupleshaper   r   tensorint64r	   r)   )inphiddenparams
has_biasesreverse	ih_weightih_biasr   r   step_outputr   r.   cnt_r   final_hxfinal_cxr+   r,   r-   r   s                    @@@@r   one_layer_while_loop_lstmrH   
   s{    q	Iq	I%fQi4G%fQi4G[A%q	Fq8H6!9d  
		Q	B			Q	B++2237K5<)..q1BS ++q!	rxx|	 hhyy	K-$ $ ,,q
,C!+3R4"AHh hhqk !!!$h&6&6q&9:::r    c	                    [        U5      S:w  a  [        S5      e[        X#US   R                  S5      US   R                  S5      :g  5      n[	        [        US   US   5      5      n	[        n
[        U U	UUUUUUUU
5
      u  p[	        [        U6 5      nU[        R                  " US   S5      [        R                  " US   S5      4$ )aj  
LSTM implementation using while_loop for export compatibility.

This is a drop-in replacement for the default LSTM decomposition that uses
while_loop instead of Python loops, making it more suitable for torch.export.

Args:
    input: Input tensor
    hx: Tuple of (h0, c0) hidden states
    params: List of weight and bias tensors
    has_biases: Whether biases are included
    num_layers: Number of LSTM layers
    dropout: Dropout probability
    train: Training mode
    bidirectional: Whether to use bidirectional LSTM
    batch_first: Whether batch dimension is first

Returns:
    Tuple of (output, h_n, c_n)
r   zlstm expects two hidden statesr   r   )
r1   AssertionErrorr   r   listziprH   r   r&   stackinputr   r>   r?   
num_layersdropouttrainbidirectionalbatch_firstr=   layer_fnr   final_hiddenss                r   lstm_while_loop_implrW   I   s    > 2w!|=>>6r!uzz!}1

1/MNF#beRU#$F(H$C m,-MM!,a0%++mA>NPQ2RRRr    c                   ^^^^^ US   mUS   mU(       a  US   OSmU(       a  US   OSm[         R                  R                  R                  U TT5      mU(       a  TR	                  S5      OTmUR                  S5      n[         R                  " TR                  S5      /[        UR                  SS 5      Q7UR                  UR                  S.6nU4S jnUUUUU4S jn[         R                  " S[         R                  S	9n	[        XxXU/5      u  pnU(       a  UR	                  S5      nXR                  S5      4$ )
a<  
1 layer fn for while loop GRU

Args:
    inp: Input tensor of shape (seq_len, batch, input_size)
    hidden: Hidden state tensor
    params: List of weight and bias tensors
    has_biases: Whether biases are included
    reverse: Whether to process sequence in reverse

Returns:
    Tuple of (output, final_hidden)
r   r   r   Nr   r   c                 ,   > U TR                  S5      :  $ r   r   )r   r   
cur_hiddenr   s      r   r   )one_layer_while_loop_gru.<locals>.cond_fn   r   r    c                   > U R                  5       n[        R                  " U5        [        R                  " UTR                  S5      S-
  S9  [	        TU   UTTTT5      nUR                  5       nUR                  S5      X'   U S-   X4$ )Nr   r   r"   )r%   r&   r'   r   r   r(   r)   )	r*   r   rZ   r   r+   r,   rB   rA   r   s	       r   r.   )one_layer_while_loop_gru.<locals>.body_fn   s    HHJQQ$5$:$:1$=$ABa *i)W

 iik##A&Qw''r    r0   )r&   r3   r4   r5   r6   r2   r7   r   r8   r9   r   r   r:   r;   r	   r)   )r<   r=   r>   r?   r@   rZ   rC   r   r.   rD   rE   r   final_hiddenr+   r,   rB   rA   r   s                @@@@@r   one_layer_while_loop_grur_   }   s'    q	Iq	I%fQi4G%fQi4G++223	7K5<)..q1BS!!!$J ++q!	z#	$   	K-
( 
( ,,q
,C%g:8VWALhhqk$$Q'''r    c	                     [        X#S5      n[        UR                  S5      5      n	[        n
[	        U U	UUUUUUUU
5
      u  pU[
        R                  " US5      4$ )aU  
GRU implementation using while_loop for export compatibility.

This is a drop-in replacement for the default GRU decomposition that uses
while_loop instead of Python loops, making it more suitable for torch.export.

Args:
    input: Input tensor
    hx: Hidden state tensor
    params: List of weight and bias tensors
    has_biases: Whether biases are included
    num_layers: Number of GRU layers
    dropout: Dropout probability
    train: Training mode
    bidirectional: Whether to use bidirectional GRU
    batch_first: Whether batch dimension is first

Returns:
    Tuple of (output, h_n)
Fr   )r   rK   unbindr_   r   r&   rM   rN   s                r   gru_while_loop_implrb      sj    > 6u5F"))A,F'H$C M1---r    return)NNNc              #   n  #    [         S   nUR                  U S5      nU R                  R                  [        R                  R
                  R                  S5      n XU '   XR                  [        R                  R
                  R                  '   Sv   Ub  X2U '   OUR                  U S5        Ub2  UU R                  [        R                  R
                  R                  '   gU R                  R                  [        R                  R
                  R                  S5        g! Ub  X2U '   OUR                  U S5        Ub2  UU R                  [        R                  R
                  R                  '   f U R                  R                  [        R                  R
                  R                  S5        f = f7f)af  
Generic context manager for registering while_loop-based RNN decompositions.

Args:
    rnn_op: The aten operation to patch (e.g., torch.ops.aten.lstm.input)
    rnn_impl: The while_loop-based implementation function

Note:
    This is an internal helper. Use register_lstm_while_loop_decomposition()
    or register_gru_while_loop_decomposition() instead.
post_autogradN)r   get
py_kernelsr&   _CDispatchKeyCompositeImplicitAutogradpop)rnn_oprnn_implregistryoriginal_decomporiginal_py_kernels        r   &_register_rnn_while_loop_decompositionrq      si     */:H ll640O  **..66X#LT%((..HHI &.V LL& )" ehh22LLM
 !!%(("6"6"P"PRVW &.V LL& )" ehh22LLM
 !!%(("6"6"P"PRVWs    AF58D# BF5#BF22F5c               #      #    [        [        R                  R                  R                  R
                  [        5         Sv   SSS5        g! , (       d  f       g= f7f)aN  
Context manager that temporarily registers the while_loop-based LSTM decomposition.

The while_loop-based decomposition is more suitable for export and graph-based
execution, as it avoids Python control flow that cannot be captured in the graph.
This should support dynamic sequence lengths, however as while_loop does not
support Autograd yet, an ExportedProgram created with this will not be trainable.

Usage::

    from torch.export._patches import register_lstm_while_loop_decomposition
    from torch.export import export

    with register_lstm_while_loop_decomposition():
        # Export your model with LSTM
        ep = export(model, (x, h0, c0))

Note:
    This context manager temporarily modifies the global decomposition table
    and py_kernels registration. The original registrations are restored when
    exiting the context.
N)rq   r&   opsatenlstmrO   rW    r    r   &register_lstm_while_loop_decompositionrw     s=     0 
0		!!#7
 	
 
 
   =AA	A
AAc               #      #    [        [        R                  R                  R                  R
                  [        5         Sv   SSS5        g! , (       d  f       g= f7f)aF  
Context manager that temporarily registers the while_loop-based GRU decomposition.

The while_loop-based decomposition is more suitable for export and graph-based
execution, as it avoids Python control flow that cannot be captured in the graph.
This should support dynamic sequence lengths, however as while_loop does not
support Autograd yet, an ExportedProgram created with this will not be trainable.

Usage::

    from torch.export._patches import register_gru_while_loop_decomposition
    from torch.export import export

    with register_gru_while_loop_decomposition():
        # Export your model with GRU
        ep = export(model, (x, h0))

Note:
    This context manager temporarily modifies the global decomposition table
    and py_kernels registration. The original registrations are restored when
    exiting the context.
N)rq   r&   rs   rt   grurO   rb   rv   r    r   %register_gru_while_loop_decompositionr{   2  s=     0 
0		  "5
 	
 
 
rx   )F)
contextlibcollections.abcr   r&   torch._decompr   torch._decomp.decompositionsr   r   r   r   "torch._higher_order_ops.while_loopr	   rH   rW   r_   rb   contextmanagerrq   rw   r{   rv   r    r   <module>r      s     %  4 X X 9<;~1Sh3(l..b ,X ,X ,X^ 	:J0K  : y9I/J  r    