
    Z jz              	       d   S r SSKrSSKJr  SSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJr  SS	KJr  SS
KJrJrJrJrJrJr  SSKJr  SSKJr  \" 5       (       a	  SSKJrJr  OS rS r\R:                  " \5      r\" SS9\ " S S\5      5       5       r \" SS9\ " S S\5      5       5       r!\" SS9\ " S S\5      5       5       r" " S S\RF                  5      r$ " S S\RF                  5      r% " S S \RF                  5      r&SCS!\RN                  S"\(S#\)S$\RN                  4S% jjr* " S& S'\RF                  5      r+ " S( S)\RF                  5      r, " S* S+\RF                  5      r- " S, S-\RF                  5      r. " S. S/\RF                  5      r/ " S0 S1\RF                  5      r0 " S2 S3\RF                  5      r1 " S4 S5\RF                  5      r2 " S6 S7\RF                  5      r3\ " S8 S9\5      5       r4\ " S: S;\45      5       r5\" S<S9 " S= S>\45      5       r6\" S?S9 " S@ SA\	\45      5       r7/ SBQr8g)Dz9PyTorch Dilated Neighborhood Attention Transformer model.    N)	dataclass)nn   )ACT2FN)BackboneMixinfilter_output_hidden_states)BackboneOutput)PreTrainedModel)ModelOutputOptionalDependencyNotAvailableauto_docstringis_natten_availableloggingrequires_backends)can_return_tuple   )DinatConfig)
natten2davnatten2dqkrpbc                      [        5       eNr   argskwargss     y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/dinat/modeling_dinat.pyr   r   *       ,..    c                      [        5       er   r   r   s     r   r   r   -   r   r   zO
    Dinat encoder's outputs, with potential hidden states and attentions.
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\
\R                  S4   S-  \S'   Sr\
\R                  S4   S-  \S'   Sr\
\R                  S4   S-  \S'   S	rg)
DinatEncoderOutput7   a  
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nlast_hidden_state.hidden_states
attentionsreshaped_hidden_states )__name__
__module____qualname____firstlineno____doc__r$   torchFloatTensor__annotations__r%   tupler&   r'   __static_attributes__r(   r   r   r"   r"   7   s}     37u((4/6:>M5**C/047>7;Je'',-4;CGE%"3"3S"89D@Gr   r"   zW
    Dinat model's outputs that also contains a pooling of the last hidden states.
    c                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S	'   S
rg)DinatModelOutputM   a  
pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
    Average pooling of the last layer hidden-state.
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nr$   pooler_output.r%   r&   r'   r(   )r)   r*   r+   r,   r-   r$   r.   r/   r0   r6   r%   r1   r&   r'   r2   r(   r   r   r4   r4   M   s    	 37u((4/6.2M5$$t+2:>M5**C/047>7;Je'',-4;CGE%"3"3S"89D@Gr   r4   z1
    Dinat outputs for image classification.
    c                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S'   Sr\\R                  S4   S-  \S	'   S
rg)DinatImageClassifierOutputf   a  
loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
    Classification (or regression if config.num_labels==1) loss.
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Classification (or regression if config.num_labels==1) scores (before SoftMax).
reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
    Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
    shape `(batch_size, hidden_size, height, width)`.

    Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
    include the spatial dimensions.
Nlosslogits.r%   r&   r'   r(   )r)   r*   r+   r,   r-   r:   r.   r/   r0   r;   r%   r1   r&   r'   r2   r(   r   r   r8   r8   f   s     &*D%

d
")'+FE$+:>M5**C/047>7;Je'',-4;CGE%"3"3S"89D@Gr   r8   c                   r   ^  \ rS rSrSrU 4S jrS\R                  S-  S\\R                     4S jr
SrU =r$ )	DinatEmbeddings   z.
Construct the patch and position embeddings.
c                    > [         TU ]  5         [        U5      U l        [        R
                  " UR                  5      U l        [        R                  " UR                  5      U l
        g r   )super__init__DinatPatchEmbeddingspatch_embeddingsr   	LayerNorm	embed_dimnormDropouthidden_dropout_probdropoutselfconfig	__class__s     r   rA   DinatEmbeddings.__init__   sG     4V <LL!1!12	zz&"<"<=r   pixel_valuesNreturnc                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )rC   rF   rI   )rK   rO   
embeddingss      r   forwardDinatEmbeddings.forward   s4    **<8
YYz*
\\*-
r   )rI   rF   rC   )r)   r*   r+   r,   r-   rA   r.   r/   r1   TensorrS   r2   __classcell__rM   s   @r   r=   r=      s9    >E$5$5$< u||AT  r   r=   c                   l   ^  \ rS rSrSrU 4S jrS\R                  S-  S\R                  4S jr	Sr
U =r$ )	rB      z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, height, width, hidden_size)` to be consumed by a
Transformer.
c                 H  > [         TU ]  5         UR                  nUR                  UR                  pCX0l        US:X  a  O[        S5      e[        R                  " [        R                  " U R                  US-  SSSS9[        R                  " US-  USSSS95      U l	        g )N   z2Dinat only supports patch size of 4 at the moment.   r   r   r\   r\   r   r   )kernel_sizestridepadding)
r@   rA   
patch_sizenum_channelsrE   
ValueErrorr   
SequentialConv2d
projection)rK   rL   rc   rd   hidden_sizerM   s        r   rA   DinatPatchEmbeddings.__init__   s    &&
$*$7$79I9Ik(? QRR--IId'')9vV\flmIIkQ&PV`fg
r   rO   NrP   c                     UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      nUR	                  SSSS5      nU$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r   r\   r   r   )shaperd   re   rh   permute)rK   rO   _rd   heightwidthrR   s          r   rS   DinatPatchEmbeddings.forward   sZ    )5););&,,,w  __\2
''1a3
r   )rd   rh   )r)   r*   r+   r,   r-   rA   r.   r/   rU   rS   r2   rV   rW   s   @r   rB   rB      s4    
"	E$5$5$< 	 	 	r   rB   c                      ^  \ rS rSrSr\R                  4S\S\R                  SS4U 4S jjjr	S\
R                  S\
R                  4S	 jrS
rU =r$ )DinatDownsampler   z
Convolutional Downsampling Layer.

Args:
    dim (`int`):
        Number of input channels.
    norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
        Normalization layer class.
dim
norm_layerrP   Nc           	         > [         TU ]  5         Xl        [        R                  " USU-  SSSSS9U l        U" SU-  5      U l        g )Nr\   r]   r^   r_   F)r`   ra   rb   bias)r@   rA   ru   r   rg   	reductionrF   )rK   ru   rv   rM   s      r   rA   DinatDownsampler.__init__   sC    3CVF\binoq3w'	r   input_featurec                     U R                  UR                  SSSS5      5      R                  SSSS5      nU R                  U5      nU$ )Nr   r   r   r\   )ry   rm   rF   )rK   r{   s     r   rS   DinatDownsampler.forward   sJ    }'<'<Q1a'HIQQRSUVXY[\]		-0r   )ru   rF   ry   )r)   r*   r+   r,   r-   r   rD   intModulerA   r.   rU   rS   r2   rV   rW   s   @r   rs   rs      sT     :< (C (RYY ($ ( (U\\ ell  r   rs   input	drop_probtrainingrP   c                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )z[
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

        r   r   )r   )dtypedevice)rl   ndimr.   randr   r   floor_div)r   r   r   	keep_probrl   random_tensoroutputs          r   	drop_pathr      s    
 CxII[[^

Q 77E

5ELL YYMYYy!M1FMr   c                      ^  \ rS rSrSrSS\S-  SS4U 4S jjjrS\R                  S\R                  4S jr	S\
4S	 jrS
rU =r$ )DinatDropPath   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   rP   c                 .   > [         TU ]  5         Xl        g r   )r@   rA   r   )rK   r   rM   s     r   rA   DinatDropPath.__init__   s    "r   r%   c                 B    [        XR                  U R                  5      $ r   )r   r   r   rK   r%   s     r   rS   DinatDropPath.forward   s    FFr   c                      SU R                    3$ )Nzp=r   rK   s    r   
extra_reprDinatDropPath.extra_repr   s    DNN#$$r   r   r   )r)   r*   r+   r,   r-   floatrA   r.   rU   rS   strr   r2   rV   rW   s   @r   r   r      sQ    b#%$, #$ # #GU\\ Gell G%C % %r   r   c                   x   ^  \ rS rSrU 4S jr S	S\R                  S\S-  S\\R                     4S jjr	Sr
U =r$ )
NeighborhoodAttention   c                   > [         TU ]  5         X#-  S:w  a  [        SU SU S35      eX0l        [	        X#-  5      U l        U R                  U R
                  -  U l        X@l        XPl        [        R                  " [        R                  " USU R                  -  S-
  SU R                  -  S-
  5      5      U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R&                  " UR(                  5      U l        g )Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ()r\   r   )rx   )r@   rA   re   num_attention_headsr~   attention_head_sizeall_head_sizer`   dilationr   	Parameterr.   zerosrpbLinearqkv_biasquerykeyvaluerG   attention_probs_dropout_probrI   rK   rL   ru   	num_headsr`   r   rM   s         r   rA   NeighborhoodAttention.__init__   s:   ?a#C5(^_h^iijk  $- #&s#7 !558P8PP&  <<ID<L<L8Lq8PTUX\XhXhThklTl noYYt1143E3EFOO\
99T//1C1C&//ZYYt1143E3EFOO\
zz&"E"EFr   r%   output_attentionsNrP   c                 p   UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      nU[        R                  " U R                  5      -  n[        XVU R                  U R                  U R                  5      n[        R                  R                  USS9n	U R!                  U	5      n	[#        XU R                  U R                  5      n
U
R%                  SSSSS5      R'                  5       n
U
R)                  5       S S U R*                  4-   nU
R                  U5      n
U(       a  X4nU$ U
4nU$ )	Nr   r\   )ru   r   r   r[   )rl   r   r   view	transposer   r   mathsqrtr   r   r`   r   r   
functionalsoftmaxrI   r   rm   
contiguoussizer   )rK   r%   r   input_shapehidden_shapequery_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                r   rS   NeighborhoodAttention.forward  s   
 $))#2.CCbC$*B*BCjj/44\BLLQPQRHH]+00>HHAN	jj/44\BLLQPQR
 "DIId.F.F$GG )4K[K[]a]j]jk --//0@b/I ,,7"?AQAQSWS`S`a%--aAq!<GGI"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r   )
r   r   r   rI   r`   r   r   r   r   r   Fr)   r*   r+   r,   rA   r.   rU   boolr1   rS   r2   rV   rW   s   @r   r   r      sE    G2 */!||!  $;! 
u||		! !r   r   c                   z   ^  \ rS rSrU 4S jrS\R                  S\R                  S\R                  4S jrSrU =r	$ )NeighborhoodAttentionOutputi,  c                    > [         TU ]  5         [        R                  " X"5      U l        [        R
                  " UR                  5      U l        g r   )r@   rA   r   r   denserG   r   rI   rK   rL   ru   rM   s      r   rA   $NeighborhoodAttentionOutput.__init__-  s4    YYs(
zz&"E"EFr   r%   input_tensorrP   c                 J    U R                  U5      nU R                  U5      nU$ r   r   rI   )rK   r%   r   s      r   rS   #NeighborhoodAttentionOutput.forward2  s$    

=1]3r   r   
r)   r*   r+   r,   rA   r.   rU   rS   r2   rV   rW   s   @r   r   r   ,  s7    G
U\\  RWR^R^  r   r   c                   x   ^  \ rS rSrU 4S jr S	S\R                  S\S-  S\\R                     4S jjr	Sr
U =r$ )
NeighborhoodAttentionModulei9  c                 f   > [         TU ]  5         [        XX4U5      U l        [	        X5      U l        g r   )r@   rA   r   rK   r   r   r   s         r   rA   $NeighborhoodAttentionModule.__init__:  s*    )&yxX	1&>r   r%   r   NrP   c                 d    U R                  X5      nU R                  US   U5      nU4USS  -   nU$ Nr   r   )rK   r   )rK   r%   r   self_outputsattention_outputr   s         r   rS   #NeighborhoodAttentionModule.forward?  s@    
 yyB;;|AF#%QR(88r   )r   rK   r   r   rW   s   @r   r   r   9  sD    ? */||  $; 
u||		 r   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )DinatIntermediateiJ  c                   > [         TU ]  5         [        R                  " U[	        UR
                  U-  5      5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r@   rA   r   r   r~   	mlp_ratior   
isinstance
hidden_actr   r   intermediate_act_fnr   s      r   rA   DinatIntermediate.__init__K  sd    YYsC(8(83(>$?@
f''--'-f.?.?'@D$'-'8'8D$r   r%   rP   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   r   s     r   rS   DinatIntermediate.forwardS  s&    

=100?r   r   r   rW   s   @r   r   r   J  s(    9U\\ ell  r   r   c                   b   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSrU =r	$ )DinatOutputiY  c                    > [         TU ]  5         [        R                  " [	        UR
                  U-  5      U5      U l        [        R                  " UR                  5      U l	        g r   )
r@   rA   r   r   r~   r   r   rG   rH   rI   r   s      r   rA   DinatOutput.__init__Z  sF    YYs6#3#3c#9:C@
zz&"<"<=r   r%   rP   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   s     r   rS   DinatOutput.forward_  s$    

=1]3r   r   r   rW   s   @r   r   r   Y  s(    >
U\\ ell  r   r   c            	          ^  \ rS rSrS
U 4S jjrS r SS\R                  S\S-  S\	\R                  \R                  4   4S jjr
S	rU =r$ )
DinatLayerie  c                   > [         TU ]  5         UR                  U l        UR                  U l        X@l        U R                  U R                  -  U l        [        R                  " X!R                  S9U l	        [        XX0R                  U R                  S9U l        US:  a  [        U5      O[        R                  " 5       U l        [        R                  " X!R                  S9U l        [!        X5      U l        [%        X5      U l        UR(                  S:  a>  [        R*                  " UR(                  [,        R.                  " SU45      -  SS9U l        g S U l        g )Neps)r`   r   r   r   r\   T)requires_grad)r@   rA   chunk_size_feed_forwardr`   r   window_sizer   rD   layer_norm_epslayernorm_beforer   	attentionr   Identityr   layernorm_afterr   intermediater   r   layer_scale_init_valuer   r.   oneslayer_scale_parameters)rK   rL   ru   r   r   drop_path_raterM   s         r   rA   DinatLayer.__init__f  s   '-'E'E$!-- ++dmm; "S6K6K L40@0@4==
 ;I3:N~6TVT_T_Ta!||C5J5JK-f:!&. ,,q0 LL66QH9MM]ab 	#  	#r   c                     U R                   nSnX$:  d  X4:  aD  S=pg[        SXC-
  5      n[        SXB-
  5      n	SSXhXy4n[        R                  R	                  X5      nX4$ )N)r   r   r   r   r   r   r   )r   maxr   r   pad)
rK   r%   ro   rp   r   
pad_valuespad_lpad_tpad_rpad_bs
             r   	maybe_padDinatLayer.maybe_padz  sn    &&'
5#6E;./E;/0EQe;JMM--mHM((r   r%   r   NrP   c                    UR                  5       u  p4pVUnU R                  U5      nU R                  XU5      u  pUR                  u  ppU R	                  XS9nUS   nUS   S:  =(       d    US   S:  nU(       a  US S 2S U2S U2S S 24   R                  5       nU R                  b  U R                  S   U-  nXpR                  U5      -   nU R                  U5      nU R                  U R                  U5      5      nU R                  b  U R                  S   U-  nXR                  U5      -   nU(       a  XS   4nU$ U4nU$ )N)r   r   r      r   )r   r   r  rl   r   r   r  r   r   r   r   )rK   r%   r   
batch_sizero   rp   channelsshortcutr  rn   
height_pad	width_padattention_outputsr   
was_paddedlayer_outputlayer_outputss                    r   rS   DinatLayer.forward  sf   
 /<.@.@.B+
E --m<$(NN=%$P!&3&9&9#y NN=N^,Q/]Q&;*Q-!*;
/7F7FUFA0EFQQS&&2#::1=@PP >>2B#CC++M:{{4#4#4\#BC&&266q9LHL$~~l'CC@Q';< YeWfr   )r   r   r   r   r   r`   r  r   r   r   r   )r   r   )r)   r*   r+   r,   rA   r  r.   rU   r   r1   rS   r2   rV   rW   s   @r   r   r   e  sR    
(	) */$||$  $;$ 
u||U\\)	*	$ $r   r   c                   x   ^  \ rS rSrU 4S jr S	S\R                  S\S-  S\\R                     4S jjr	Sr
U =r$ )

DinatStagei  c                 $  > [         T	U ]  5         Xl        X l        [        R
                  " [        U5       Vs/ s H  n[        UUUXX   Xh   S9PM     sn5      U l        Ub  U" U[        R                  S9U l
        OS U l
        SU l        g s  snf )N)rL   ru   r   r   r  )ru   rv   F)r@   rA   rL   ru   r   
ModuleListranger   layersrD   
downsamplepointing)
rK   rL   ru   depthr   	dilationsr  r!  irM   s
            r   rA   DinatStage.__init__  s    mm u	 &A !'&\#1#4 &	
 !(SR\\JDO"DO%	s   Br%   r   NrP   c                     UR                  5       u  p4pS[        U R                  5       H  u  pgU" X5      nUS   nM     Un	U R                  b  U R                  U	5      nX4n
U(       a  U
WSS  -  n
U
$ r   )r   	enumerater   r!  )rK   r%   r   rn   ro   rp   r%  layer_moduler  !hidden_states_before_downsamplingstage_outputss              r   rS   DinatStage.forward  s    
 ,0025(5OA(JM)!,M  6 -:)??& OO,MNM&J]12..Mr   )rL   ru   r!  r   r"  r   r   rW   s   @r   r  r    sD    8 */||  $; 
u||		 r   r  c                      ^  \ rS rSrU 4S jr    SS\R                  S\S-  S\S-  S\S-  S\S-  S	\\	-  4S
 jjr
SrU =r$ )DinatEncoderi  c                   > [         TU ]  5         [        UR                  5      U l        Xl        [        R                  " SUR                  [        UR                  5      SS9 Vs/ s H  o"R                  5       PM     nn[        R                  " [        U R                  5       Vs/ s H  n[        U[        UR                   SU-  -  5      UR                  U   UR"                  U   UR$                  U   U[        UR                  S U 5      [        UR                  S US-    5       X@R                  S-
  :  a  [&        OS S9PM     sn5      U l        g s  snf s  snf )Nr   cpu)r   r\   r   )rL   ru   r#  r   r$  r  r!  )r@   rA   lendepths
num_levelsrL   r.   linspacer  sumitemr   r  r  r  r~   rE   r   r$  rs   levels)rK   rL   xdpri_layerrM   s        r   rA   DinatEncoder.__init__  s0   fmm,!&63H3H#fmmJ\ej!kl!kAvvx!klmm  %T__5  6G !F,,q'z9: --0$..w7$..w7#&s6=='+B'Cc&--XeZadeZeJfFg#h4;ooPQ>Q4Q/X\  6
 ms   &E(B#Er%   r   Noutput_hidden_states(output_hidden_states_before_downsamplingreturn_dictrP   c                    U(       a  SOS nU(       a  SOS nU(       a  SOS nU(       a  UR                  SSSS5      n	Xa4-  nXy4-  n[        U R                  5       H  u  pU" X5      nUS   nUS   nU(       a&  U(       a  UR                  SSSS5      n	Xm4-  nXy4-  nO,U(       a%  U(       d  UR                  SSSS5      n	Xa4-  nXy4-  nU(       d  My  XSS  -  nM     U(       d  [        S XU4 5       5      $ [	        UUUUS9$ )Nr(   r   r   r   r\   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r(   ).0vs     r   	<genexpr>'DinatEncoder.forward.<locals>.<genexpr>  s     m$[q$[s   	)r$   r%   r&   r'   )rm   r(  r7  r1   r"   )rK   r%   r   r<  r=  r>  all_hidden_statesall_reshaped_hidden_statesall_self_attentionsreshaped_hidden_stater%  r)  r  r*  s                 r   rS   DinatEncoder.forward  sA    #7BD+?RT"$5b4$1$9$9!Q1$E!!11&*BB&(5OA(JM)!,M0=a0@-#(P(I(Q(QRSUVXY[\(]%!%II!*.FF*%.V(5(=(=aAq(I%!%55!*.FF*  #QR'88#%  6( m]GZ$[mmm!++*#=	
 	
r   )rL   r7  r3  )FFFT)r)   r*   r+   r,   rA   r.   rU   r   r1   r"   rS   r2   rV   rW   s   @r   r.  r.    st    
. */,1@E#'.
||.
  $;.
 #Tk	.

 37+.
 D[.
 
#	#.
 .
r   r.  c                   ,    \ rS rSr% \\S'   SrSrSrSr	g)DinatPreTrainedModeli!  rL   dinatrO   )imager(   N)
r)   r*   r+   r,   r   r0   base_model_prefixmain_input_nameinput_modalitiesr2   r(   r   r   rK  rK  !  s    $O!r   rK  c                      ^  \ rS rSrSU 4S jjrS r\    SS\R                  S-  S\	S-  S\	S-  S\	S-  S	\
\-  4
S
 jj5       rSrU =r$ )
DinatModeli)  c                   > [         TU ]  U5        [        U S/5        Xl        [	        UR
                  5      U l        [        UR                  SU R                  S-
  -  -  5      U l	        [        U5      U l        [        U5      U l        [        R                  " U R                  UR                   S9U l        U(       a  [        R$                  " S5      OSU l        U R)                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
nattenr\   r   r   N)r@   rA   r   rL   r1  r2  r3  r~   rE   num_featuresr=   rR   r.  encoderr   rD   r   	layernormAdaptiveAvgPool1dpooler	post_init)rK   rL   add_pooling_layerrM   s      r   rA   DinatModel.__init__+  s    
 	 $
+fmm, 0 0119L3M MN)&1#F+d&7&7V=R=RS1Bb**1- 	r   c                 .    U R                   R                  $ r   rR   rC   r   s    r   get_input_embeddingsDinatModel.get_input_embeddingsA      ///r   NrO   r   r<  r>  rP   c                 Z   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  U5      nU R                  UUUUS9nUS   nU R                  U5      nS n	U R                  bH  U R                  UR                  SS5      R                  SS5      5      n	[        R                  " U	S5      n	U(       d  X4USS  -   n
U
$ [        UU	UR                  UR                  UR                  S9$ )Nz You have to specify pixel_valuesr   r<  r>  r   r   r\   )r$   r6   r%   r&   r'   )rL   r   r<  r>  re   rR   rV  rW  rY  flattenr   r.   r4   r%   r&   r'   )rK   rO   r   r<  r>  r   embedding_outputencoder_outputssequence_outputpooled_outputr   s              r   rS   DinatModel.forwardD  s?    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY?@@??<8,,/!5#	 ' 
 *!,..9;;" KK(?(?1(E(O(OPQST(UVM!MM-;M%58KKFM-')77&11#2#I#I
 	
r   )rL   rR   rV  rW  rU  r3  rY  )T)NNNN)r)   r*   r+   r,   rA   r_  r   r.   r/   r   r1   r4   rS   r2   rV   rW   s   @r   rR  rR  )  s|    ,0  26)-,0#'-
''$.-
  $;-
 #Tk	-

 D[-
 
!	!-
 -
r   rR  z
    Dinat Model transformer with an image classification head on top (a linear layer on top of the final hidden state
    of the [CLS] token) e.g. for ImageNet.
    c                      ^  \ rS rSrU 4S jr\     SS\R                  S-  S\R                  S-  S\	S-  S\	S-  S\	S-  S	\
\-  4S
 jj5       rSrU =r$ )DinatForImageClassificationiu  c                 ^  > [         TU ]  U5        [        U S/5        UR                  U l        [	        U5      U l        UR                  S:  a5  [        R                  " U R
                  R                  UR                  5      O[        R                  " 5       U l
        U R                  5         g )NrT  r   )r@   rA   r   
num_labelsrR  rL  r   r   rU  r   
classifierrZ  rJ   s     r   rA   $DinatForImageClassification.__init__|  s     $
+ ++'
 FLEVEVYZEZBIIdjj--v/@/@A`b`k`k`m 	
 	r   NrO   labelsr   r<  r>  rP   c                 V   Ub  UOU R                   R                  nU R                  UUUUS9nUS   nU R                  U5      n	Sn
Ub  U R	                  X)U R                   5      n
U(       d  U	4USS -   nU
b  U
4U-   $ U$ [        U
U	UR                  UR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nrc  r   r\   )r:   r;   r%   r&   r'   )	rL   r>  rL  rn  loss_functionr8   r%   r&   r'   )rK   rO   rp  r   r<  r>  r   r   rh  r;   r:   r   s               r   rS   #DinatForImageClassification.forward  s      &1%<k$++BYBY**/!5#	  
  
/%%fdkkBDY,F)-)9TGf$EvE)!//))#*#A#A
 	
r   )rn  rL  rm  )NNNNN)r)   r*   r+   r,   rA   r   r.   r/   
LongTensorr   r1   r8   rS   r2   rV   rW   s   @r   rk  rk  u  s       26*.)-,0#'*
''$.*
   4'*
  $;	*

 #Tk*
 D[*
 
+	+*
 *
r   rk  zL
    NAT backbone, to be used with frameworks like DETR and MaskFormer.
    c                      ^  \ rS rSrU 4S jrS r\\\   SS\	R                  S\S-  S\S-  S\S-  S	\4
S
 jj5       5       5       rSrU =r$ )DinatBackbonei  c           	        > [         TU ]  U5        [        U S/5        [        U5      U l        [        U5      U l        UR                  /[        [        UR                  5      5       Vs/ s H  n[        UR                  SU-  -  5      PM      sn-   U l        0 n[        U R                  U R                  5       H  u  pE[         R"                  " U5      X4'   M     [         R$                  " U5      U l        U R)                  5         g s  snf )NrT  r\   )r@   rA   r   r=   rR   r.  rV  rE   r  r1  r2  r~   rU  zipout_featuresr  r   rD   
ModuleDicthidden_states_normsrZ  )rK   rL   r%  r{  stagerd   rM   s         r   rA   DinatBackbone.__init__  s     $
+)&1#F+#--.X]^abhbobo^pXq1rXqST#f6F6FA6M2NXq1rr !#&t'8'8$--#HE)+l)C& $I#%==1D#E  	 2ss   *%Dc                 .    U R                   R                  $ r   r^  r   s    r   r_  "DinatBackbone.get_input_embeddings  ra  r   NrO   r<  r   r>  rP   c                 &   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nU R                  UUSSSS9nUR                  nSn	[        U R                  U5       H  u  pXR                  ;   d  M  UR                  u  ppUR                  SSSS5      R                  5       nUR                  XU-  U5      nU R                  U
   " U5      nUR                  XX5      nUR                  SSSS5      R                  5       nX4-  n	M     U(       d  U	4nU(       a  UUR                  4-  nU$ [!        U	U(       a  UR                  OSUR"                  S	9$ )
a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> processor = AutoImageProcessor.from_pretrained("shi-labs/nat-mini-in1k-224")
>>> model = AutoBackbone.from_pretrained(
...     "shi-labs/nat-mini-in1k-224", out_features=["stage1", "stage2", "stage3", "stage4"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)

>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 512, 7, 7]
```NT)r   r<  r=  r>  r(   r   r\   r   r   )feature_mapsr%   r&   )rL   r>  r<  r   rR   rV  r'   rx  stage_namesry  rl   rm   r   r   r{  r%   r	   r&   )rK   rO   r<  r   r>  r   re  r   r%   r  r|  hidden_stater  rd   ro   rp   r   s                    r   rS   DinatBackbone.forward  s   L &1%<k$++BYBY$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,/!%59  
  66#&t'7'7#GE))):F:L:L7
&+33Aq!Q?JJL+00e^\Z#77>|L+00UY+33Aq!Q?JJL/ $H "_F#70022M%3G'//T))
 	
r   )rR   rV  r{  rU  )NNN)r)   r*   r+   r,   rA   r_  r   r   r   r.   rU   r   r	   rS   r2   rV   rW   s   @r   rv  rv    s    $0   -1)-#'J
llJ
 #TkJ
  $;	J

 D[J
 
J
  ! J
r   rv  )rk  rR  rK  rv  )r   F)9r-   r   dataclassesr   r.   r   activationsr   backbone_utilsr   r   modeling_outputsr	   modeling_utilsr
   utilsr   r   r   r   r   r   utils.genericr   configuration_dinatr   natten.functionalr   r   
get_loggerr)   loggerr"   r4   r8   r   r=   rB   rs   rU   r   r   r   r   r   r   r   r   r   r   r  r.  rK  rR  rk  rv  __all__r(   r   r   <module>r     s   @  !   ! H . -  . , ;;// 
		H	% 
 H H H  
 H{ H H& 
 H H H*bii ,!299 !Hryy 0U\\ e T V[VbVb  %BII %8BII 8v
")) 
")) "		 	")) 	D DN, ,^C
299 C
L "? " " H
% H
 H
V <
"6 <
<
~ 
c
M#7 c

c
L ar   