
    Z j2n                        S SK r S SKrS SKJr  S SKrS SKJr  S SKJs  Jr	  SSK
Jr  SSKJrJr  SSKJr  SSKJr  SSKJrJr  SS	KJr  SS
KJr  SSKJrJrJr  SSKJ r   SSK!J"r"  SSK#J$r$J%r%   " S S\RL                  5      r' " S S\RL                  5      r( " S S\RL                  5      r) " S S\5      r* " S S\RL                  5      r+ " S S\5      r,\" SS9\ " S S\5      5       5       r- " S  S!\RL                  5      r. " S" S#\R^                  5      r0 " S$ S%\RL                  5      r1 " S& S'\*5      r2 " S( S)\*5      r3 " S* S+\*5      r4\\ " S, S-\5      5       5       r5\" S.S9 " S/ S0\*5      5       r6/ S1Qr7g)2    N)	dataclass   )initialization)ACT2CLSACT2FN)filter_output_hidden_states)GradientCheckpointingLayer)BaseModelOutputModelOutput)PreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuple)merge_with_config_defaults)capture_outputs   )SLANeXtConfigSLANeXtVisionConfigc                   P  ^  \ rS rSrSrU 4S jrS\S\S\R                  S\R                  4S jr	S	\R                  S
\R                  S\R                  S\
\\4   S\
\\4   S\R                  4S jrSS\R                  S\
\R                  \R                  4   4S jjrSrU =r$ )SLANeXtVisionAttention+   z=Multi-head Attention block with relative position embeddings.c                 @  > [         TU ]  5         US:X  a2  UR                  UR                  -  UR                  UR                  -  4OX"4nUR                  U l        UR
                  UR                  -  nUS-  U l        UR                  U l        [        R                  " UR
                  UR
                  S-  UR                  S9U l        [        R                  " UR
                  UR
                  5      U l        UR                  U l        U R                  (       a  Uc  [        S5      e[        R                   " ["        R$                  " SUS   -  S-
  U5      5      U l        [        R                   " ["        R$                  " SUS   -  S-
  U5      5      U l        g g )Nr   g      r   biaszBInput size must be provided if using relative positional encoding.   r   )super__init__
image_size
patch_sizenum_attention_headshidden_sizescaleattention_dropoutdropoutnnLinearqkv_biasqkvprojuse_rel_pos
ValueError	Parametertorchzeros	rel_pos_h	rel_pos_w)selfconfigwindow_size
input_sizehead_dim	__class__s        }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/slanext/modeling_slanext.pyr   SLANeXtVisionAttention.__init__.   sV    a &"3"33V5F5F&J[J[5[\+ 	 $*#=#= %%)C)CCt^
//99V//1C1Ca1Gfoo^IIf00&2D2DE	!--! !eff  \\%++a*Q-6G!6KX*VWDN\\%++a*Q-6G!6KX*VWDN     q_sizek_sizerel_posreturnc                 
   [        S[        X5      -  S-
  5      n[        R                  " UR	                  SUR
                  S   S5      R                  SSS5      USS9nUR	                  SU5      R                  SS5      n[        R                  " U5      SS2S4   [        X!-  S5      -  n[        R                  " U5      SSS24   [        X-  S5      -  nXg-
  US-
  [        X-  S5      -  -   nXXR                  5          $ )	aa  
Get relative positional embeddings according to the relative positions of
    query and key sizes.

Args:
    q_size (int):
        size of the query.
    k_size (int):
        size of key k.
    rel_pos (`torch.Tensor`):
        relative position embeddings (L, channel).

Returns:
    Extracted positional embeddings according to relative positions.
r   r   r   linear)sizemodeN      ?)
intmaxFinterpolatereshapeshapepermuter.   arangelong)	r2   r;   r<   r=   max_rel_distrel_pos_resizedq_coordsk_coordsrelative_coordss	            r8   get_rel_pos"SLANeXtVisionAttention.get_rel_posG   s      1s622Q67--OOAw}}Q/4<<Q1E

 *11"lCKKAqQ <<'403v3LL<<'a03v3LL#.6A:V_VYAZ2ZZ33566r:   queryr0   r1   c                 F   Uu  pgUu  pU R                  XhU5      n
U R                  XyU5      nUR                  u  pnUR                  XX~5      n[        R                  " SX5      n[        R                  " SX5      nUSS2SS2SS2SS2S4   USS2SS2SS2SSS24   -   nU$ )a&  
Calculate decomposed Relative Positional Embeddings from :paper:`mvitv2`.
https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py

Args:
    query (`torch.Tensor`):
        query q in the attention layer with shape (batch_size, query_height * query_width, channel).
    rel_pos_h (`torch.Tensor`):
        relative position embeddings (Lh, channel) for height axis.
    rel_pos_w (`torch.Tensor`):
        relative position embeddings (Lw, channel) for width axis.
    q_size (tuple):
        spatial sequence size of query q with (query_height, query_width).
    k_size (tuple):
        spatial sequence size of key k with (key_height, key_width).

Returns:
    decomposed_rel_pos (`torch.Tensor`):
        decomposed relative position embeddings.
zbhwc,hkc->bhwkzbhwc,wkc->bhwkN)rS   rJ   rI   r.   einsum)r2   rU   r0   r1   r;   r<   query_heightquery_width
key_height	key_widthrelative_position_heightrelative_position_width
batch_size_dimreshaped_queryrel_hrel_wdecomposed_rel_poss                      r8   get_decomposed_rel_pos-SLANeXtVisionAttention.get_decomposed_rel_posg   s    8 %+! &
#'#3#3Li#X "&"2"2;9"U"[[
szR-~X-~W"1aAt#34uQ1dA=M7NN!!r:   hidden_statesc                    UR                   u  p4pVU R                  U5      R                  X4U-  SU R                  S5      R	                  SSSSS5      nUR                  SX0R                  -  XE-  S5      R                  S5      u  pn
XR                  -  U	R                  SS5      -  nU R                  (       a?  U R                  XR                  U R                  XE4XE45      nUR                  U5      nX-   n[        R                  R                  R!                  U[        R"                  SS9R%                  UR&                  5      n[        R                  R)                  XR(                  U R*                  S	9nX-  R                  X0R                  XES5      nUR	                  SSSSS5      R                  X4US5      nU R-                  U5      nX4$ )
Nr   r@   r   r   r      )dtyper`   )ptraining)rJ   r)   rI   r!   rK   unbindr#   	transposer+   re   r0   r1   
reshape_asr.   r&   
functionalsoftmaxfloat32tork   r%   rm   r*   )r2   rg   output_attentionsr^   heightwidthr_   r)   rU   keyvalueattn_weightsrd   
attn_probsattn_outputs                  r8   forwardSLANeXtVisionAttention.forward   s   '4':':$
E HH]#WZ%D4L4LbQWQ1a# 	  KK:8P8P+PRXR`bdellmnoE

*cmmB.CC!%!<!<~~t~~" "4!>!>|!L'<Lxx**22<u}}Z\2]``afalalm]]**<<<RVR_R_*`
!)22:?W?WY_hjk!))!Q1a8@@UZ\^_ii,((r:   )r%   r!   r*   r)   r0   r1   r#   r+   N)__name__
__module____qualname____firstlineno____doc__r   rE   r.   TensorrS   tuplere   r}   __static_attributes____classcell__r7   s   @r8   r   r   +   s    GX27# 7s 7U\\ 7ell 7@("||(" <<(" <<	("
 c3h(" c3h(" 
("T)U\\ )eTYT`T`bgbnbnTnNo ) )r:   r   c            	          ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  S\\   4S jr	Sr
U =r$ )	SLANeXtAttentionGRUCell   c                    > [         TU ]  5         [        R                  " XSS9U l        [        R                  " X"5      U l        [        R                  " USSS9U l        [        R                  " X-   U5      U l        g )NFr   r   )	r   r   r&   r'   input_to_hiddenhidden_to_hiddenscoreGRUCellrnn)r2   r5   r"   num_embeddingsr7   s       r8   r    SLANeXtAttentionGRUCell.__init__   s[    !yyuM "		+ CYY{AE:
::j9;Gr:   prev_hiddenbatch_hiddenchar_onehotskwargsc                    U R                  U5      nU R                  U5      R                  S5      nXV-   n[        R                  " U5      nU R                  U5      n[        R                  " US[        R                  S9R                  UR                  5      nUR                  SS5      n[        R                  " X5      R                  S5      n	[        R                  " X/S5      n
U R                  X5      nX4$ )Nr   r`   rk   r   )r   r   	unsqueezer.   tanhr   rG   rr   rs   rt   rk   ro   matmulsqueezecatr   )r2   r   r   r   r   batch_hidden_projprev_hidden_projattention_scoresrz   contextconcat_contextrg   s               r8   r}   SLANeXtAttentionGRUCell.forward   s     !00>00=GGJ,? ::&67::&67yy!1qNQQRbRhRhi#--a3,,|:BB1EG#:A>=**r:   )r   r   r   r   )r   r   r   r   r   r.   FloatTensorr   r   r}   r   r   r   s   @r8   r   r      sQ    H+&&+ ''+ ''	+
 +,+ +r:   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )
SLANeXtMLP   c                    > [         TU ]  5         [        R                  " X5      U l        [        R                  " X5      U l        Uc  [        R                  " 5       U l        g [        U   " 5       U l        g r   )	r   r   r&   r'   fc1fc2Identityr   act_fn)r2   r"   out_channels
activationr7   s       r8   r   SLANeXtMLP.__init__   sN    99[699[7'1'9bkkmwz?R?Tr:   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r2   rg   s     r8   r}   SLANeXtMLP.forward   s2    //M2r:   )r   r   r   r   )r   r   r   r   r   r}   r   r   r   s   @r8   r   r      s    U r:   r   c                   t   ^  \ rS rSr% \\S'   SrSrSrSr	SS/r
\R                  " 5       U 4S	 j5       rS
rU =r$ )SLANeXtPreTrainedModel   r3   backbonepixel_valuesimageTstructure_attention_cellstructure_generatorc                 <  > [         TU ]  U5        [        U[        5      (       a.  UR                  b!  [
        R                  " UR                  S5        [        U[        5      (       aS  UR                  (       aB  [
        R                  " UR                  S5        [
        R                  " UR                  S5        [        U[        R                  5      (       a  UR                  S:  a#  S[        R                  " UR                  5      -  OSn[
        R                   " UR"                  U* U5        [
        R                   " UR$                  U* U5        UR&                  b#  [
        R                   " UR&                  U* U5        UR(                  b#  [
        R                   " UR(                  U* U5        [        U[*        5      (       a  S[        R                  " U R,                  R                  S-  5      -  nUR.                  4 H  nUR1                  5        Hy  n[        U[        R2                  5      (       d  M$  [
        R                   " UR4                  U* U5        UR6                  c  MV  [
        R                   " UR6                  U* U5        M{     M     gg)zInitialize the weightsNg        r   rD   )r   _init_weights
isinstanceSLANeXtVisionEncoder	pos_embedinit	constant_r   r+   r0   r1   r&   r   r"   mathsqrtuniform_	weight_ih	weight_hhbias_ihbias_hhSLANeXtSLAHeadr3   r   childrenr'   weightr   )r2   modulestd	generatorlayerr7   s        r8   r   $SLANeXtPreTrainedModel._init_weights   s    	f% f233+v//5 f455!!v//5v//5 fbjj))9?9K9Ka9O#		&"4"455UVCMM&**SD#6MM&**SD#6~~)fnnsdC8~~)fnnsdC8 fn--		$++"9"9C"?@@C$88:	&//1E!%33ellSD#> ::1 MM%**sdC@	 2 ; .r:    )r   r   r   r   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_keep_in_fp32_modules_strictr.   no_gradr   r   r   r   s   @r8   r   r      sF    "$O!&*#$>@U#V 
]]_"A "Ar:   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )SLANeXtMLPBlocki  c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR
                  UR                  5      U l        [        UR                     U l
        g r   )r   r   r&   r'   r"   mlp_dimlin1lin2r   
hidden_actactr2   r3   r7   s     r8   r   SLANeXtMLPBlock.__init__  sX    IIf00&..A	IIfnnf.@.@A	&++,r:   rg   r>   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r   s     r8   r}   SLANeXtMLPBlock.forward  s2    		-0/		-0r:   )r   r   r   )
r   r   r   r   r   r.   r   r}   r   r   r   s   @r8   r   r     s(    -U\\ ell  r:   r   c            
         ^  \ rS rSrU 4S jrS\R                  S\S\\R                  \\\4   4   4S jr	S\R                  S\S\\\4   S	\\\4   S\R                  4
S
 jr
S\R                  S\\R                     4S jrSrU =r$ )SLANeXtVisionLayeri  c                 &  > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        [        X5      U l        [        R                  " UR                  UR
                  S9U l	        [        U5      U l        X l        g )N)eps)r   r   r&   	LayerNormr"   layer_norm_epslayer_norm1r   attnlayer_norm2r   mlpr4   )r2   r3   r4   r7   s      r8   r   SLANeXtVisionLayer.__init__  sj    <<(:(:@U@UV*6?	<<(:(:@U@UV"6*&r:   rg   r4   r>   c           	      6   UR                   u  p4pVX$U-  -
  U-  nX%U-  -
  U-  n[        R                  " USSSUSU45      nXG-   XX-   pUR                  X9U-  X*U-  X&5      nUR	                  SSSSSS5      R                  5       R                  SX"U5      nXU
44$ )aw  
Args:
Partition into non-overlapping windows with padding if needed.
    hidden_states (tensor): input tokens with [batch_size, height, width, channel]. window_size (int): window
    size.

Returns:
    windows: windows after partition with [batch_size * num_windows, window_size, window_size, channel].
    (pad_height, pad_width): padded height and width before partition
r   r   r   r   ri      r@   )rJ   rG   padrI   rK   
contiguous)r2   rg   r4   r^   rv   rw   channelpad_hpad_w
pad_height	pad_widthwindowss               r8   window_partition#SLANeXtVisionLayer.window_partition$  s     .;-@-@*
E33{B{22kAmaAua-GH &I%--k1;[@XZe
  ''1aAq9DDFNNrS^mtuY///r:   r   padding_shapeoriginal_shapec                 $   Uu  pVUu  pxUR                   S   XV-  U-  U-  -  n	UR                  XU-  Xb-  X"S5      n
U
R                  SSSSSS5      R                  5       R                  XUS5      n
U
SS2SU2SU2SS24   R                  5       n
U
$ )	a  
Args:
Window unpartition into original sequences and removing padding.
    hidden_states (tensor):
        input tokens with [batch_size * num_windows, window_size, window_size, channel].
    window_size (int):
        window size.
    padding_shape (Tuple):
        padded height and width (pad_height, pad_width).
    original_shape (Tuple): original height and width (height, width) before padding.

Returns:
    hidden_states: unpartitioned sequences with [batch_size, height, width, channel].
r   r@   r   r   r   ri   r   N)rJ   rI   rK   r   )r2   r   r4   r   r   r   r   rv   rw   r^   rg   s              r8   window_unpartition%SLANeXtVisionLayer.window_unpartition<  s    " !.
&]]1%**@K*OS^*^_
k193K[gi
 !!!Q1a3>>@HHajlno 	 &a&&5&!&;<GGIr:   c                    UnU R                  U5      nU R                  S:  a:  UR                  S   UR                  S   pCU R                  XR                  5      u  pU R	                  US9u  pU R                  S:  a  U R                  XR                  WWW45      nX!-   nU R                  U5      nXR                  U5      -   nU$ )Nr   r   r   )rg   )r   r4   rJ   r   r   r  r   r   )r2   rg   residualrv   rw   r   rz   layernorm_outputs           r8   r}   SLANeXtVisionLayer.forwardZ  s     ((7a)//2M4G4G4JE+/+@+@P`P`+a(M&*ii' '0 '
# a 33MCSCSUbekmrdstM 0++M:%1A(BBr:   )r   r   r   r   r4   )r   r   r   r   r   r.   r   rE   r   r   r  r   r}   r   r   r   s   @r8   r   r     s    '0ell 0 0QVW\WcWcejknpsksetWtQu 00||25FKCQTHoglmprumugv	<U\\ eE<M<M6N  r:   r   z
    Base class for slanext vision model's outputs that also contains image embeddings obtained by applying the projection
    layer to the pooler_output.
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S'   S	rg)
SLANeXtVisionEncoderOutputio  z
image_embeds (`torch.FloatTensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`):
    The image embeddings obtained by applying the projection layer to the pooler_output.
Nimage_embedslast_hidden_state.rg   
attentionsr   )r   r   r   r   r   r  r.   r   r   r  rg   r   r  r   r   r:   r8   r
  r
  o  sr    
 .2L%##d*126u((4/6:>M5**C/047>7;Je'',-4;r:   r
  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )SLANeXtPatchEmbeddingsi  z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)r   r   r   r    num_channelsr"   r   collectionsabcIterablenum_patchesr&   Conv2d
projection)r2   r3   r   r    r  r"   r  r7   s          r8   r   SLANeXtPatchEmbeddings.__init__  s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:ir:   c                 J   UR                   u  p#pEX0R                  :w  a  [        S5      eX@R                  S   :w  d  XPR                  S   :w  a2  [        SU SU SU R                  S    SU R                  S    S3	5      eU R	                  U5      R                  SSS	S5      nU$ )
NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r   zInput image size (*z) doesn't match model (z).r   r   )rJ   r  r,   r   r  rK   )r2   r   r^   r  rv   rw   
embeddingss          r8   r}   SLANeXtPatchEmbeddings.forward  s    2>2D2D/
&,,,w  __Q''5OOA4F+F$VHAeW4KDOO\]L^K__`aeapapqras`ttvw  __\2::1aAF
r:   )r   r  r  r    r  )	r   r   r   r   r   r   r}   r   r   r   s   @r8   r  r    s    j r:   r  c                   v   ^  \ rS rSrSrSSS.U 4S jjrS\R                  S\R                  4U 4S	 jjrS
r	U =r
$ )SLANeXtLayerNormi  a5  LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
gư>channels_last)r   data_formatc                `   > [         TU ]  " U4SU0UD6  US;  a  [        SU 35      eX0l        g )Nr   )r!  channels_firstzUnsupported data format: )r   r   NotImplementedErrorr"  )r2   normalized_shaper   r"  r   r7   s        r8   r   SLANeXtLayerNorm.__init__  s=    )=s=f=AA%(A+&OPP&r:   featuresr>   c                    > U R                   S:X  a9  UR                  SSSS5      n[        TU ]  U5      nUR                  SSSS5      nU$ [        TU ]  U5      nU$ )zt
Args:
    features: Tensor of shape (batch_size, channels, height, width) OR (batch_size, height, width, channels)
r$  r   r   r   r   )r"  rK   r   r}   )r2   r(  r7   s     r8   r}   SLANeXtLayerNorm.forward  sj    
 //''1a3Hwx0H''1a3H  wx0Hr:   r"  )r   r   r   r   r   r   r.   r   r}   r   r   r   s   @r8   r   r     s9    
 15/ ' '   r:   r   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )SLANeXtVisionNecki  r3   c                 T  > [         TU ]  5         Xl        [        R                  " UR
                  UR                  SSS9U l        [        UR                  SS9U l	        [        R                  " UR                  UR                  SSSS9U l
        [        UR                  SS9U l        g )Nr   F)r  r   r$  r+  r   )r  paddingr   )r   r   r3   r&   r  r"   output_channelsconv1r   r   conv2r   r   s     r8   r   SLANeXtVisionNeck.__init__  s    YYv1163I3IWX_de
+F,B,BP`aYYv55v7M7M[\fgnst
+F,B,BP`ar:   c                     UR                  SSSS5      nU R                  U5      nU R                  U5      nU R                  U5      nU R	                  U5      nU$ )Nr   r   r   r   )rK   r1  r   r2  r   r   s     r8   r}   SLANeXtVisionNeck.forward  sZ    %--aAq9

=1((7

=1((7r:   )r3   r1  r2  r   r   )	r   r   r   r   r   r   r}   r   r   r   s   @r8   r-  r-    s    b2 b r:   r-  c            
          ^  \ rS rSr\\S.rSrS\4U 4S jjr	S r
\\" SS9 SS
\R                  S	-  S\\   S\\-  4S jj5       5       rSrU =r$ )r   i  )rg   r  r   r3   c                   > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        S U l        UR                  (       aj  [        R                  " [        R                  " SUR                  UR                  -  UR                  UR                  -  UR                  5      5      U l        [        R                  " 5       U l        [!        UR"                  5       HC  n[%        UX!R&                  ;  a  UR(                  OSS9nU R                  R+                  U5        ME     [-        U5      U l        SU l        U R3                  5         g )Nr   r   )r4   F)r   r   r3   r   r  patch_embedr   use_abs_posr&   r-   r.   r/   r    r"   
ModuleListlayersrangenum_hidden_layersr   global_attn_indexesr4   appendr-  neckgradient_checkpointing	post_init)r2   r3   ir   r7   s       r8   r   SLANeXtVisionEncoder.__init__  s     ++1&9\\%%):)::%%):)::&&	DN mmov//0A&23;U;U2UF..[\E KKu% 1 &f-	&+#r:   c                     U R                   $ r   )r8  )r2   s    r8   get_input_embeddings)SLANeXtVisionEncoder.get_input_embeddings  s    r:   F)tie_last_hidden_statesNr   r   r>   c                     Uc  [        S5      eU R                  U5      nU R                  b  X0R                  -   nU R                   H  nU" U5      nM     U R	                  U5      n[        US9$ )Nz You have to specify pixel_values)r  )r,   r8  r   r;  r@  r
  )r2   r   r   rg   layer_modules        r8   r}   SLANeXtVisionEncoder.forward  sr    
 ?@@((6>>%)NN:M KKL(7M (		-0)+
 	
r:   )r3   rA  r   r;  r@  r8  r   r   )r   r   r   r   r   r   _can_record_outputsr   r   r   rF  r   r   r.   r   r   r   r   r
  r}   r   r   r   s   @r8   r   r     sz    ,>Nde!2 >   E27;
!--4
GMN`Ga
	+	+
 3  
r:   r   c                   h   ^  \ rS rSr S	S\S-  4U 4S jjjrS\R                  S\\	   4S jr
SrU =r$ )
SLANeXtBackbonei  Nr3   c           	         > [         TU ]  U5        [        UR                  5      U l        [
        R                  " UR                  UR                  SSSSS9U l	        U R                  5         g )Nr   r   r   F)r  r  r/  r   )r   r   r   vision_configvision_towerr&   r  post_conv_in_channelspost_conv_out_channels	post_convrB  r2   r3   r   r7   s      r8   r   SLANeXtBackbone.__init__  s^    
 	 01E1EF((&*G*GUV_`jkrw
 	r:   rg   r   c                     U R                   " U40 UD6nU R                  UR                  5      nUR                  S5      R	                  SS5      n[        UUR                  UR                  S9$ )Nr   r   )r  rg   r  )rQ  rT  r  flattenro   r
   rg   r  )r2   rg   r   vision_outputs       r8   r}   SLANeXtBackbone.forward  sj    ))-B6B}'F'FG%--a0::1a@+'55$//
 	
r:   )rT  rQ  r   )r   r   r   r   dictr   r.   r   r   r   r}   r   r   r   s   @r8   rN  rN    s@     #
t
 

U\\ 
VDV=W 
 
r:   rN  c                      ^  \ rS rSrS\0r SS\S-  4U 4S jjjr\\	\
 SS\R                  S\R                  S-  S\\   4S	 jj5       5       5       rS
rU =r$ )r   i$  r  Nr3   c                    > [         TU ]  U5        [        UR                  UR                  UR
                  5      U l        [        UR                  UR
                  5      U l        U R                  5         g r   )
r   r   r   rS  r"   r   r   r   r   rB  rU  s      r8   r   SLANeXtSLAHead.__init__)  s_    
 	 (?))6+=+=v?R?R)
% $.f.@.@&BUBU#V r:   rg   targetsr   c                    [         R                  " UR                  S   U R                  R                  4[         R
                  UR                  S9n[         R                  " UR                  S   /[         R                  UR                  S9n/ n/ n[        U R                  R                  S-   5       H  n[        R                  " XPR                  R                  5      R                  5       n	U R                  XAR                  5       U	5      u  pHU R                  U5      n
U
R!                  SS9nUR#                  U
5        UR#                  U5        [         R$                  " USS9R'                  U R                  R                  S-
  5      R)                  S5      R+                  5       (       d  M    O   [        R,                  " [         R$                  " USS9S[         R
                  S9R/                  UR0                  5      n[3        XS9$ )	Nr   )rk   device)rB   rk   ra  r   )r`   r@   r   )r  rg   )r.   r/   rJ   r3   r"   rs   ra  rM   r<  max_text_lengthrG   one_hotr   floatr   r   argmaxr?  stackeqanyallrr   rt   rk   r
   )r2   rg   r_  r   r(  predicted_charsstructure_preds_liststructure_ids_listr_   embedding_featurestructure_stepstructure_predss               r8   r}   SLANeXtSLAHead.forward7  s    ;;  #T[[%<%<=U]][h[o[o
  ++M,?,?,B+C5::^k^r^rs!t{{22Q67A !		/;;;S;S T Z Z \77BUBUBWYjkKH!55h?N,333:O ''7%%o6{{-15889Q9QTU9UVZZ[]^bbdd 8 ))EKK0D!$LRT\a\i\ijmm
 eer:   )r   r   r   )r   r   r   r   r   rL  r[  r   r   r   r   r.   r   r   r   r   r}   r   r   r   s   @r8   r   r   $  s    - #t     (,f((f $f +,	f !   fr:   r   c                   j    \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
g) SLANeXtForTableRecognitionOutputiX  aY  
head_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Hidden-states of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` states (depending on early exits).
head_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
    Attentions of the SLANeXtSLAHead at each prediction step, varies up to max `self.config.max_text_length` attentions (depending on early exits).
Nhead_hidden_stateshead_attentionsr   )r   r   r   r   r   rs  r.   r   r   rt  r   r   r:   r8   rr  rr  X  s4     48))D0704OU&&-4r:   rr  z
    SLANeXt Table Recognition model for table recognition tasks. Wraps the core SLANeXtPreTrainedModel
    and returns outputs compatible with the Transformers table recognition API.
    c            	          ^  \ rS rSrS\4U 4S jjr\\S\R                  S\
\   S\\R                     \-  4S j5       5       rSrU =r$ )	SLANeXtForTableRecognitionif  r3   c                 |   > [         TU ]  U5        [        US9U l        [	        US9U l        U R                  5         g )N)r3   )r   r   rN  r   r   headrB  r   s     r8   r   #SLANeXtForTableRecognition.__init__m  s2     'v6"&1	r:   r   r   r>   c                     U R                   " U40 UD6nU R                  " UR                  40 UD6n[        UR                  UR                  UR
                  UR                  UR
                  S9$ )N)r  rg   r  rs  rt  )r   rx  r  rr  rg   r  )r2   r   r   backbone_outputshead_outputss        r8   r}   "SLANeXtForTableRecognition.forwards  sl    
  ==@@yy!1!C!CNvN/*<<*88'22+99(33
 	
r:   )r   rx  )r   r   r   r   r   r   r   r   r.   r   r   r   r   rr  r}   r   r   r   s   @r8   rv  rv  f  s`    }  
!--
9?@R9S
	u  	!$D	D
  
r:   rv  )r   rN  rv  r   )8r  r   dataclassesr   r.   torch.nnr&   torch.nn.functionalrq   rG    r   r   activationsr   r   backbone_utilsr   modeling_layersr	   modeling_outputsr
   r   modeling_utilsr   processing_utilsr   utilsr   r   r   utils.genericr   utils.output_capturingr   configuration_slanextr   r   Moduler   r   r   r   r   r   r
  r  r   r   r-  r   rN  r   rr  rv  __all__r   r:   r8   <module>r     s  ,   !     & * 9 9 < - & I I 7 5 EB)RYY B)J+bii +B +A_ +A\bii Q3 Qh  	< 	< 	< RYY  Fr|| 4		 (6
1 6
r
, 
01f+ 1fh 
	5 	5  	5 
!7 

. hr:   