
    Z jt              	          S r SSKrSSKrSSKrSSKJr  SSKJr  SSK	J
r
  SSKJrJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJr  SSKJr  SSKJr  \R8                  " \5      r " S S\R>                  5      r \RB                  RD                  S 5       r#S r$ " S S\R>                  5      r%S1S\RL                  S\'S\(S\RL                  4S jjr) " S S\R>                  5      r* " S S\R>                  5      r+ " S S\R>                  5      r, " S  S!\R>                  5      r-S" r.S# r/ " S$ S%\5      r0 " S& S'\R>                  5      r1\ " S( S)\5      5       r2\ " S* S+\25      5       r3\" S,S-9 " S. S/\\25      5       r4/ S0Qr5g)2zPyTorch ViTDet backbone.    N)nn   )initialization)ACT2FN)BackboneMixinfilter_output_hidden_states)GradientCheckpointingLayer)BackboneOutputBaseModelOutput)PreTrainedModel)auto_docstringlogging)can_return_tuple   )VitDetConfigc                   l   ^  \ rS rSrSrU 4S jrS rS\R                  S\R                  4S jr	Sr
U =r$ )	VitDetEmbeddings$   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) to be consumed by a Transformer.
c                 x  > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l
        X0l        X@l        X`l        UR                  (       a@  US-   n[        R                  " [        R                   " SXqR
                  5      5      U l        OS U l        [        R$                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)super__init__pretrain_image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterable
image_sizenum_patches use_absolute_position_embeddingsr   	Parametertorchzerosposition_embeddingsConv2d
projection)	selfconfigr"   r   r   r   r#   num_positions	__class__s	           {/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/vitdet/modeling_vitdet.pyr   VitDetEmbeddings.__init__*   s    !'!;!;V=N=NJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&22'!OM')||EKK=RdRd4e'fD$'+D$))L:i    c                    U(       a  USS2SS24   nUR                   S   n[        [        R                  " U5      5      nXf-  U:w  a  [	        S5      e[
        R                  R                  5       (       d
  Xc:w  d  Xd:w  aX  [        R                  R                  UR                  SXfS5      R                  SSSS5      X44SS	S
9nUR                  SSSS5      $ UR                  SX4S5      $ )a?  
Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token dimension for the
original embeddings.

Args:
    abs_pos_embeddings (`torch.Tensor`):
        Absolute positional embeddings with (1, num_position, num_channels).
    has_cls_token (`bool`):
        If true, has 1 embedding in abs_pos_embeddings for cls token.
    height (`int`):
        Height of input image tokens.
    width (`int`):
        Width of input image tokens.

Returns:
    Absolute positional embeddings after processing with shape (1, height, width, num_channels)
Nr   z5Absolute position embeddings must be a square number.r   r      bicubicF)sizemodealign_corners)shapeintmathsqrt
ValueErrorr&   jit
is_tracingr   
functionalinterpolatereshapepermute)r+   abs_pos_embeddingshas_cls_tokenheightwidthnum_positionr6   new_abs_pos_embeddingss           r/   get_absolute_positions'VitDetEmbeddings.get_absolute_positions@   s    $ !3AqrE!:)//2499\*+;,&TUU99!!dn%']]%>%>"**1d"=EEaAqQ_#	 &? &" *11!Q1==%--aCCr1   pixel_valuesreturnc                 z   UR                   S   nX R                  :w  a  [        SU R                   SU S35      eU R                  U5      nU R                  bb  UR                  SSSS5      nX0R                  U R                  SUR                   S   UR                   S   5      -   nUR                  SSSS5      nU$ )	Nr   zoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r   r4   r   T)r9   r   r=   r*   r(   rC   rJ   )r+   rL   r   
embeddingss       r/   forwardVitDetEmbeddings.forwardf   s    #))!,,,,!../yaI  __\2
##/#++Aq!Q7J#&A&A(($
0@0@0CZEUEUVWEX' J $++Aq!Q7Jr1   )r"   r   r#   r   r(   r*   )__name__
__module____qualname____firstlineno____doc__r   rJ   r&   TensorrQ   __static_attributes____classcell__r.   s   @r/   r   r   $   s5    
j,$DLELL U\\  r1   r   c                 H   [        S[        X5      -  S-
  5      nUR                  S   U:w  aq  [        R                  R                  UR                  SUR                  S   S5      R                  SSS5      USS9nUR                  SU5      R                  SS5      nOUn[        R                  " U 5      SS2S4   [        X-  S5      -  n[        R                  " U5      SSS24   [        X-  S5      -  nXV-
  US-
  [        X-  S5      -  -   nXGR                  5          $ )	aq  
Get relative positional embeddings according to the relative positions of query and key sizes.

Args:
    q_size (`int`):
        Size of query q.
    k_size (`int`):
        Size of key k.
    rel_pos (`torch.Tensor`):
        Relative position embeddings (num_embeddings, num_channels).

Returns:
    Extracted positional embeddings according to relative positions.
r4   r   r   r3   linear)r6   r7   Ng      ?)r:   maxr9   r   r@   rA   rB   rC   r&   arangelong)q_sizek_sizerel_posmax_rel_distrel_pos_resizedq_coordsk_coordsrelative_coordss           r/   get_rel_posri   |   s     q3v..23L}}Q<'--33OOAw}}Q/4<<Q1E 4 

 *11"lCKKAqQ! ||F#AtG,s6?C/HHH||F#D!G,s6?C/HHH*vzSRU=V.VVO//122r1   c                    Uu  pgUu  p[        XhU5      n
[        XyU5      nUR                  u  pnUR                  XX~5      n[        R                  " SX5      n
[        R                  " SX5      nU R                  XXxU	5      U
SS2SS2SS2SS2S4   -   USS2SS2SS2SSS24   -   R                  XU-  X-  5      n U $ )ax  
Calculate decomposed Relative Positional Embeddings as introduced in
[MViT2](https://github.com/facebookresearch/mvit/blob/19786631e330df9f3622e5402b4a419a263a2c80/mvit/models/attention.py).

Args:
    attn (`torch.Tensor`):
        Attention map.
    queries (`torch.Tensor`):
        Query q in the attention layer with shape (batch_size, queries_height * queries_width, num_channels).
    rel_pos_h (`torch.Tensor`):
        Relative position embeddings (Lh, num_channels) for height axis.
    rel_pos_w (`torch.Tensor`):
        Relative position embeddings (Lw, num_channels) for width axis.
    q_size (`tuple[int]`):
        Spatial sequence size of query q with (queries_height, queries_width).
    k_size (`tuple[int]`):
        Spatial sequence size of key k with (keys_height, keys_width).

Returns:
    attn (Tensor): attention map with added relative positional embeddings.
zbhwc,hkc->bhwkzbhwc,wkc->bhwkN)ri   r9   rB   r&   einsumview)attnqueries	rel_pos_h	rel_pos_wra   rb   queries_heightqueries_widthkeys_height
keys_widthrelative_heightrelative_width
batch_size_dimr_qrelative_weights                    r/   !add_decomposed_relative_positionsr|      s    , %+!N$K!.yIO IFN J3
//*m
ICll#3SJOll#3SIO 			*m*U
!Q1d*
+	,
!Q4*
+	, d:5{7OP	 	 Kr1   c                   :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )VitDetAttention   z=Multi-head Attention block with relative position embeddings.c                 (  > [         TU ]  5         UR                  nUR                  nX@l        X4-  nUS-  U l        [        R                  " X3S-  UR                  S9U l	        [        R                  " X35      U l
        UR                  U l        U R                  (       as  [        R                  " [        R                  " SUS   -  S-
  U5      5      U l        [        R                  " [        R                  " SUS   -  S-
  U5      5      U l        gg)z
Args:
    config (`VitDetConfig`):
        Model configuration.
    input_size (`tuple[int]`, *optional*):
        Input resolution, only required in case relative position embeddings are added.
g      r   biasr4   r   r   N)r   r   r   num_attention_heads	num_headsscaler   Linearqkv_biasqkvproj use_relative_position_embeddingsr%   r&   r'   ro   rp   )r+   r,   
input_sizery   r   head_dimr.   s         r/   r   VitDetAttention.__init__   s     	  ..	"#t^
99S'@IIc'	060W0W-00\\%++a*Q-6G!6KX*VWDN\\%++a*Q-6G!6KX*VWDN 1r1   c           	         UR                   u  p4pVU R                  U5      R                  X4U-  SU R                  S5      R	                  SSSSS5      nUR                  SX0R                  -  XE-  S5      R                  S5      u  pn
XR                  -  U	R                  SS5      -  nU R                  (       a%  [        XU R                  U R                  XE4XE45      nUR                  SS9nX-  nUR                  X0R                  XES5      nUR	                  SSSSS5      nUR                  X4US5      nU R                  U5      nU(       a<  UR                  X0R                  UR                   S   UR                   S   5      nX4nU$ U4nU$ )	Nr   r3   r4   r   r      )ry   )r9   r   rB   r   rC   unbindr   	transposer   r|   ro   rp   softmaxrl   r   )r+   hidden_stateoutput_attentionsrw   rF   rG   rx   r   rn   keysvaluesattention_scoresattention_probsoutputss                 r/   rQ   VitDetAttention.forward   s   '3'9'9$
Ehh|$,,Z%DNN\^_gghiklnoqrtuv #AzNN/JFN\^ _ f fgh iv#jj0DNN2r4JJ00@ 4>>4>>F?]c\k  +22r2:&/#((^^VTVW#++Aq!Q:#++JrJyy.-55NNO,A,A",EG\G\]_G`O $5G  $oGr1   )r   r   r   ro   rp   r   r   NF	rS   rT   rU   rV   rW   r   rQ   rY   rZ   r[   s   @r/   r~   r~      s    GX4 r1   r~   input	drop_probtrainingrM   c                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )z[
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

        r   r   )r   )dtypedevice)r9   ndimr&   randr   r   floor_div)r   r   r   	keep_probr9   random_tensoroutputs          r/   	drop_pathr   	  s    
 CxII[[^

Q 77E

5ELL YYMYYy!M1FMr1   c                      ^  \ rS rSrSrSS\S-  SS4U 4S jjjrS\R                  S\R                  4S jr	S\
4S	 jrS
rU =r$ )VitDetDropPathi  zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   rM   c                 .   > [         TU ]  5         Xl        g r   )r   r   r   )r+   r   r.   s     r/   r   VitDetDropPath.__init__  s    "r1   hidden_statesc                 B    [        XR                  U R                  5      $ r   )r   r   r   )r+   r   s     r/   rQ   VitDetDropPath.forward   s    FFr1   c                      SU R                    3$ )Nzp=r   r+   s    r/   
extra_reprVitDetDropPath.extra_repr#  s    DNN#$$r1   r   r   )rS   rT   rU   rV   rW   floatr   r&   rX   rQ   strr   rY   rZ   r[   s   @r/   r   r     sQ    b#%$, #$ # #GU\\ Gell G%C % %r1   r   c                   6   ^  \ rS rSrSrSU 4S jjrS rSrU =r$ )VitDetLayerNormi'  a<  
A LayerNorm variant, popularized by Transformers, that performs point-wise mean and variance normalization over the
channel dimension for inputs that have shape (batch_size, channels, height, width).
https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119
c                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        X l	        U4U l
        g r   )r   r   r   r%   r&   onesweightr'   r   epsnormalized_shape)r+   r   r   r.   s      r/   r   VitDetLayerNorm.__init__.  sR    ll5::.>#?@LL-=!>?	!1 3r1   c                    UR                  SSS9nX-
  R                  S5      R                  SSS9nX-
  [        R                  " X0R                  -   5      -  nU R
                  S S 2S S 4   U-  U R                  S S 2S S 4   -   nU$ )Nr   T)keepdimr4   )meanpowr&   r<   r   r   r   )r+   xuss       r/   rQ   VitDetLayerNorm.forward5  s    FF1dF#UKKN40UejjXX..KK4&*TYYq$}-EEr1   )r   r   r   r   )gư>r   r[   s   @r/   r   r   '  s    4 r1   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )VitDetResBottleneckBlocki=  z
The standard bottleneck residual block without the last activation layer. It contains 3 conv layers with kernels
1x1, 3x3, 1x1.
c                   > [         TU ]  5         [        R                  " X$SSS9U l        [        U5      U l        [        UR                     U l	        [        R                  " XDSSSS9U l
        [        U5      U l        [        UR                     U l        [        R                  " XCSSS9U l        [        U5      U l        g)a"  
Args:
    config (`VitDetConfig`):
        Model configuration.
    in_channels (`int`):
        Number of input channels.
    out_channels (`int`):
        Number of output channels.
    bottleneck_channels (`int`):
        Number of output channels for the 3x3 "bottleneck" conv layers.
r   Fr   r   )paddingr   N)r   r   r   r)   conv1r   norm1r   
hidden_actact1conv2norm2act2conv3norm3)r+   r,   in_channelsout_channelsbottleneck_channelsr.   s        r/   r   !VitDetResBottleneckBlock.__init__C  s     	YY{O
$%89
6,,-	YY2TU\ab
$%89
6,,-	YY2!%P
$\2
r1   c                 P    UnU R                  5        H  nU" U5      nM     X-   nU$ r   )children)r+   r   outlayers       r/   rQ    VitDetResBottleneckBlock.forward[  s.    ]]_E*C % g
r1   )r   r   r   r   r   r   r   r   r   r[   s   @r/   r   r   =  s    
30 r1   r   c                   r   ^  \ rS rSrS\S\SS4U 4S jjrS\R                  S\R                  4S jrS	r	U =r
$ )
	VitDetMlpid  in_featureshidden_featuresrM   Nc                   > [         TU ]  5         [        R                  " X#5      U l        [
        UR                     U l        [        R                  " X25      U l        [        R                  " UR                  5      U l        g r   )r   r   r   r   fc1r   r   actfc2Dropoutdropout_probdrop)r+   r,   r   r   r.   s       r/   r   VitDetMlp.__init__e  sV    99[:&++,99_:JJv223	r1   r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   r   )r+   r   s     r/   rQ   VitDetMlp.forwardl  sH    HHQKHHQKIIaLHHQKIIaLr1   )r   r   r   r   )rS   rT   rU   rV   r:   r   r&   rX   rQ   rY   rZ   r[   s   @r/   r   r   d  s=    4C 4# 4$ 4 %,,  r1   r   c           	      H   U R                   u  p#pEXU-  -
  U-  nXU-  -
  U-  n[        R                  R                  U SSSUSU45      n X6-   XG-   pU R	                  X(U-  XU-  X5      n U R                  SSSSSS5      R                  5       R	                  SXU5      n
XU	44$ )a  
Partition into non-overlapping windows with padding if needed.

Args:
    hidden_state (`torch.Tensor`):
        Input tokens with [batch_size, height, width, num_channels].
    window_size (`int`):
        Window size.

Returns:
    `tuple(torch.FloatTensor)` comprising various elements:
    - windows: windows after partition with [batch_size * num_windows, window_size, window_size, num_channels].
    - (padded_height, padded_width): padded height and width before partition
r   r   r   r4   r      r3   )r9   r   r@   padrl   rC   
contiguous)r   window_sizerw   rF   rG   r   
pad_height	pad_widthpadded_heightpadded_widthwindowss              r/   window_partitionr   v  s     /;.@.@+J 44CJ{22kAI ==$$\Aq!Y:3VWL"("5u7H<$$[0+{?Z\gL ""1aAq!4??AFFr;eqrGL111r1   c                 (   Uu  pEUu  pgU R                   S   XE-  U-  U-  -  nU R                  XU-  XQ-  XS5      n	U	R                  SSSSSS5      R                  5       n	U	R                  XUS5      n	U	SS2SU2SU2SS24   R                  5       n	U	$ )	a  
Window unpartition into original sequences and removing padding.

Args:
    windows (`torch.Tensor`):
        Input tokens with [batch_size * num_windows, window_size, window_size, num_channels].
    window_size (`int`):
        Window size.
    pad_height_width (`tuple[int]`):
        Padded height and width (padded_height, padded_width).
    height_width (`tuple[int]`):
        Original height and width before padding.

Returns:
    hidden_state: unpartitioned sequences with [batch_size, height, width, num_channels].
r   r3   r   r   r4   r   r   N)r9   rl   rC   r   )
r   r   pad_height_widthheight_widthr   r   rF   rG   rw   r   s
             r/   window_unpartitionr     s    " #3M MFq!m&Bk&QU`&`aJ<<[0,2M{ikL  ''1aAq9DDFL$$ZbQL  7F7FUFA 56AACLr1   c                      ^  \ rS rSrSr SS\S\S\S\SS4
U 4S	 jjjr	 SS
\
R                  S\S\\
R                  \
R                  4   \\
R                     -  4S jjrSrU =r$ )VitDetLayeri  zCThis corresponds to the Block class in the original implementation.r,   drop_path_rater   use_residual_blockrM   Nc                   > [         T	U ]  5         UR                  nUR                  n[	        U[
        [        45      (       a  UOXf4nUR                  n[	        U[
        [        45      (       a  UOXw4nUS   US   -  US   US   -  4n[        R                  " XQR                  S9U l        [        XS:X  a  UOX34S9U l        US:  a  [        U5      O[        R                  " 5       U l        [        R                  " XQR                  S9U l        [%        X['        XQR(                  -  5      S9U l        X0l        X@l        U R.                  (       a  [1        UUUUS-  S9U l        g g )	Nr   r   )r   )r   r   )r,   r   r   r4   )r,   r   r   r   )r   r   r   r"   r   listtupler   r   	LayerNormlayer_norm_epsr   r~   	attentionr   Identityr   r   r   r:   	mlp_ratiomlpr   r  r   residual)
r+   r,   r  r   r  ry   r"   r   r   r.   s
            r/   r   VitDetLayer.__init__  sF    	  &&
#-j4-#H#HZzNf
&&
#-j4-#H#HZzNf
 mz!}4jmzRS}6TU
\\#+@+@A
(A-=zKC]
 <JC;O7UWU`U`Ub\\#+@+@A
FSQTWgWgQgMhi&"4""4 $'1H	DM #r1   r   r   c                 b   UR                  SSSS5      nUnU R                  U5      nU R                  S:  a4  UR                  S   UR                  S   pT[	        XR                  5      u  pU R                  UUS9nUS   nUSS  nU R                  S:  a  [        XR                  WWW45      nX0R                  U5      -   nXR                  U R                  U R                  U5      5      5      -   nUR                  SSSS5      nU R                  (       a  U R                  U5      nU4U-   nU$ )Nr   r4   r   r   )r   )rC   r   r   r9   r   r  r   r   r  r   r  r  )	r+   r   r   shortcutrF   rG   r   self_attention_outputsr   s	            r/   rQ   VitDetLayer.forward  s@   
 &--aAq9 

=1 a)//2M4G4G4JE.>}N^N^._+M!%/ "0 "
 /q1(, a.}>N>NP`cikpbqrM !>>-#@@%txx

=@Y7Z([[%--aAq9"" MM-8M "W,r1   )r  r   r  r   r   r  r  r   )r   r   Fr   )rS   rT   rU   rV   rW   r   r   r:   boolr   r&   rX   r  rQ   rY   rZ   r[   s   @r/   r   r     s    M qv!"!49!LO!im!	! !L #('||'  ' 
u||U\\)	*U5<<-@	@	' 'r1   r   c                   v   ^  \ rS rSrS\SS4U 4S jjr   SS\R                  S\S\S	\S\	\
-  4
S
 jjrSrU =r$ )VitDetEncoderi  r,   rM   Nc           
        > [         TU ]  5         Xl        UR                  n[        R
                  " SUR                  USS9 Vs/ s H  o3R                  5       PM     nn/ n[        U5       HG  nUR                  [        UXF   XaR                  ;   a  UR                  OSXaR                  ;   S95        MI     [        R                  " U5      U l        SU l        g s  snf )Nr   cpu)r   )r  r   r  F)r   r   r,   num_hidden_layersr&   linspacer  itemrangeappendr   window_block_indicesr   residual_block_indicesr   
ModuleListr   gradient_checkpointing)r+   r,   depthr   r  layersir.   s          r/   r   VitDetEncoder.__init__  s    (( -2NN1f>S>SUZch,ij,iq&&(,ijuAMM#1#467;V;V6V 2 2\]'(,I,I'I	  ]]6*
&+# ks   Cr   r   output_hidden_statesreturn_dictc                 "   U(       a  SOS nU(       a  SOS n[        U R                  5       H/  u  pxU(       a  XQ4-   nU" X5      n	U	S   nU(       d  M'  XiS   4-   nM1     U(       a  XQ4-   nU(       d  [        S XU4 5       5      $ [        UUUS9$ )N r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r'  ).0vs     r/   	<genexpr>(VitDetEncoder.forward.<locals>.<genexpr>5  s     m$[q$[s   	last_hidden_stater   
attentions)	enumerater   r  r   )
r+   r   r   r$  r%  all_hidden_statesall_self_attentionsr"  layer_modulelayer_outputss
             r/   rQ   VitDetEncoder.forward  s     #7BD$5b4(4OA#$58H$H!(JM)!,M  &91=M<O&O#  5   14D Dm]GZ$[mmm++*
 	
r1   )r,   r  r   )FFT)rS   rT   rU   rV   r   r   r&   rX   r  r  r   rQ   rY   rZ   r[   s   @r/   r  r    sd    ,| , ,2 #(%* 
||
  
 #	

 
 
	 
 
r1   r  c                       \ rS rSr% \\S'   SrSrSrSr	/ r
\R                  " 5       S\R                  \R                  -  \R                   -  SS	4S
 j5       rSrg	)VitDetPreTrainedModeli=  r,   vitdetrL   )imageTmodulerM   Nc                 6   [        U[        R                  [        R                  45      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  5      (       aA  [        R                  " UR                  5        [        R                  " UR                  5        g[        U[        5      (       a5  [        R
                  " UR                  SU R                  R                  S9  g[        U[        5      (       a  U R                  R                   (       ai  [        R
                  " UR"                  SU R                  R                  S9  [        R
                  " UR$                  SU R                  R                  S9  g[        U[&        5      (       Ga.  UR(                  UR*                  UR,                  4 HS  n[        R.                  " UR                  SSS9  UR                  c  M2  [        R0                  " UR                  S5        MU     UR2                  UR4                  4 HC  n[        R                  " UR                  5        [        R                  " UR                  5        ME     [        R                  " UR6                  R                  5        [        R                  " UR6                  R                  5        gg)zInitialize the weightsr   )r   stdNfan_outrelu)r7   nonlinearityr   )r   r   r   r)   inittrunc_normal_r   r,   initializer_ranger   zeros_r  ones_r   r(   r~   r   ro   rp   r   r   r   r   kaiming_normal_	constant_r   r   r   )r+   r:  r   s      r/   _init_weights#VitDetPreTrainedModel._init_weightsF  s    fryy"))455v}}3DKK<Y<YZ{{&FKK( '--KK$JJv}}% 011v99IfIfg00T[[5a5av//ct{{?\?\]v//ct{{?\?\] 899 ,,fllC$$U\\	PVW::)NN5::q1 D !,,5

5<<(EJJ' 6 KK++,KK))* :r1   r'  )rS   rT   rU   rV   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modulesr&   no_gradr   r   r)   r  rG  rY   r'  r1   r/   r7  r7  =  sa     $O!&*#
]]_+BII		$9BLL$H +T + +r1   r7  c                      ^  \ rS rSrS\4U 4S jjrS\4S jr\    SS\	R                  S-  S\S-  S	\S-  S
\S-  S\\-  4
S jj5       rSrU =r$ )VitDetModelib  r,   c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r   )r   r   r,   r   rP   r  encoder	post_init)r+   r,   r.   s     r/   r   VitDetModel.__init__d  s9     *62$V, 	r1   rM   c                 .    U R                   R                  $ r   rP   r*   r   s    r/   get_input_embeddings VitDetModel.get_input_embeddingsn      )))r1   NrL   r   r$  r%  c                 n   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  U5      nU R                  UUUUS9nUS   nU(       d	  U4USS -   $ [        UUR                  UR                  S9$ )aw  
Examples:

```python
>>> from transformers import VitDetConfig, VitDetModel
>>> import torch

>>> config = VitDetConfig()
>>> model = VitDetModel(config)

>>> pixel_values = torch.randn(1, 3, 224, 224)

>>> with torch.no_grad():
...     outputs = model(pixel_values)

>>> last_hidden_states = outputs.last_hidden_state
>>> list(last_hidden_states.shape)
[1, 768, 14, 14]
```Nz You have to specify pixel_values)r   r$  r%  r   r   r-  )
r,   r   r$  r%  r=   rP   rS  r   r   r/  )	r+   rL   r   r$  r%  kwargsembedding_outputencoder_outputssequence_outputs	            r/   rQ   VitDetModel.forwardq  s    8 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++BYBY?@@??<8,,/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r1   )r,   rP   rS  )NNNN)rS   rT   rU   rV   r   r   r   rX  r   r&   rX   r  r  r   rQ   rY   rZ   r[   s   @r/   rQ  rQ  b  s    | *&6 *  -1)-,0#'5
llT)5
  $;5
 #Tk	5

 D[5
 
	 5
 5
r1   rQ  zF
    ViTDet backbone, to be used with frameworks like Mask R-CNN.
    )custom_introc                      ^  \ rS rSrU 4S jrS\4S jr\\\	   SS\
R                  S\S-  S\S-  S	\S-  S\4
S
 jj5       5       5       rSrU =r$ )VitDetBackbonei  c                    > [         TU ]  U5        [        U5      U l        [	        U5      U l        [        UR                  S-   5       Vs/ s H  o!R                  PM     snU l	        U R                  5         g s  snf )Nr   )r   r   r   rP   r  rS  r  r  r   num_featuresrT  )r+   r,   rx   r.   s      r/   r   VitDetBackbone.__init__  sh     *62$V,9>v?W?WZ[?[9\]9\A//9\] 	 ^s   A9rM   c                 .    U R                   R                  $ r   rW  r   s    r/   rX  #VitDetBackbone.get_input_embeddings  rZ  r1   NrL   r$  r   r%  c                     Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  U5      nU R                  USUUS9nU(       a  UR                  OUS   nSn	[        U R                  U5       H  u  pXR                  ;   d  M  X4-  n	M     U(       d  U(       a  U	4USS -   nU$ U	4USS -   nU$ [        U	U(       a  UR                  OSUR                  S9$ )ap  
Examples:

```python
>>> from transformers import VitDetConfig, VitDetBackbone
>>> import torch

>>> config = VitDetConfig()
>>> model = VitDetBackbone(config)

>>> pixel_values = torch.randn(1, 3, 224, 224)

>>> with torch.no_grad():
...     outputs = model(pixel_values)

>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 768, 14, 14]
```NT)r$  r   r%  r   r'  r4   )feature_mapsr   r/  )r,   r%  r$  r   rP   rS  r   zipstage_namesout_featuresr
   r/  )r+   rL   r$  r   r%  r\  r]  r   r   rj  stager   r   s                r/   rQ   VitDetBackbone.forward  s+   < &1%<k$++BYBY$8$D $++JjJj 	 2C1N-TXT_T_TqTq??<8,,!%/#	  
 2=--'!*#&t'7'7#GE)))/ $H #&712;6 M '712;6M%3G'//T))
 	
r1   )rP   rS  re  )NNN)rS   rT   rU   rV   r   r   rX  r   r   r   r&   rX   r  r
   rQ   rY   rZ   r[   s   @r/   rc  rc    s    *&6 *   -1)-#'<
ll<
 #Tk<
  $;	<

 D[<
 
<
  ! <
r1   rc  )rQ  r7  rc  )r   F)6rW   collections.abcr   r;   r&   r    r   r@  activationsr   backbone_utilsr   r   modeling_layersr	   modeling_outputsr
   r   modeling_utilsr   utilsr   r   utils.genericr   configuration_vitdetr   
get_loggerrS   loggerModuler   r>   script_if_tracingri   r|   r~   rX   r   r  r   r   r   r   r   r   r   r   r  r7  rQ  rc  __all__r'  r1   r/   <module>r     s        & ! H 9 ? - , - . 
		H	%Uryy Up !3 !3H&R;bii ;~U\\ e T V[VbVb  %RYY %bii ,$ryy $N		 $2@>M, M`5
BII 5
p !+O !+ !+H D
' D
 D
N 
M
]$9 M

M
` Er1   