
    Z jw                     &   S r SSKrSSKJr  SSKJr  SSKrSSKJr  SSKJ	r
  SSKJr  SS	KJr  SS
KJrJrJrJr  SSKJrJr  SSKJr  SSKJrJrJrJrJr  SSKJ r J!r!  SSK"J#r#  SSK$J%r%  \RL                  " \'5      r( " S S\RR                  5      r* " S S\RR                  5      r+  SAS\RR                  S\RX                  S\RX                  S\RX                  S\RX                  S-  S\-S-  S\-S\\   4S jjr. " S S \RR                  5      r/ " S! S"\RR                  5      r0 " S# S$\RR                  5      r1 " S% S&\RR                  5      r2 " S' S(\RR                  5      r3 " S) S*\5      r4 " S+ S,\RR                  5      r5\ " S- S.\5      5       r6\ " S/ S0\65      5       r7 " S1 S2\RR                  5      r8\" S3S49 " S5 S6\65      5       r9\" S7S49 " S8 S9\65      5       r:\" S:S49\ " S; S<\5      5       5       r;\" S=S49 " S> S?\65      5       r</ S@Qr=g)BzPyTorch DeiT model.    N)Callable)	dataclass)nn   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingImageClassifierOutputMaskedImageModelingOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstringlogging	torch_int)can_return_tuplemerge_with_config_defaults)capture_outputs   )
DeiTConfigc            	          ^  \ rS rSrSrSS\S\SS4U 4S jjjrS\R                  S	\
S
\
S\R                  4S jr  SS\R                  S\R                  S-  S\S\R                  4S jjrSrU =r$ )DeiTEmbeddings+   zn
Construct the CLS token, distillation token, position and patch embeddings. Optionally, also the mask token.
configuse_mask_tokenreturnNc                   > [         TU ]  5         [        R                  " [        R
                  " SSUR                  5      5      U l        [        R                  " [        R
                  " SSUR                  5      5      U l        U(       a6  [        R                  " [        R
                  " SSUR                  5      5      OS U l	        [        U5      U l        U R                  R                  n[        R                  " [        R
                  " SUS-   UR                  5      5      U l        [        R                  " UR                  5      U l        UR"                  U l        g )Nr      )super__init__r   	Parametertorchzeroshidden_size	cls_tokendistillation_token
mask_tokenDeiTPatchEmbeddingspatch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropout
patch_size)selfr   r   r.   	__class__s       w/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/deit/modeling_deit.pyr$   DeiTEmbeddings.__init__0   s    ekk!Q8J8J&KL"$,,u{{1aASAS/T"UQ_",,u{{1a9K9K'LMei 3F ;++77#%<<A{QPVPbPb0c#d zz&"<"<= ++    
embeddingsheightwidthc                    UR                   S   S-
  nU R                  R                   S   S-
  n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SS24   nU R                  SS2SS24   nUR                   S   nX R
                  -  n	X0R
                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SS	S
9nUR                  SSSS5      R                  SSU5      n[        R                  " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing and 2 class embeddings.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   r"   N      ?r   r   bicubicF)sizemodealign_cornersdim)shaper/   r&   jit
is_tracingr3   r   reshapepermuter   
functionalinterpolateviewcat)r4   r9   r:   r;   r.   num_positionsclass_and_dist_pos_embedpatch_pos_embedrD   
new_height	new_widthsqrt_num_positionss               r6   interpolate_pos_encoding'DeiTEmbeddings.interpolate_pos_encoding<   sU    !&&q)A-0066q9A= yy##%%+*F6?+++#'#;#;ArrE#B 221ab59r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nyy2D!LLr8   pixel_valuesbool_masked_posrT   c                    UR                   u    pEnU R                  U5      nUR                  5       u  pnUbI  U R                  R	                  XS5      n
UR                  S5      R                  U
5      nUSU-
  -  X-  -   nU R                  R	                  USS5      nU R                  R	                  USS5      n[        R                  " XU4SS9nU R                  nU(       a  U R                  XuU5      nX~-   nU R                  U5      nU$ )Nr=   g      ?r   rC   )rE   r-   r@   r+   expand	unsqueezetype_asr)   r*   r&   rM   r/   rT   r2   )r4   rV   rW   rT   _r:   r;   r9   
batch_size
seq_lengthmask_tokensmask
cls_tokensdistillation_tokensposition_embeddings                  r6   forwardDeiTEmbeddings.forwardd   s    +001e**<8
$.OO$5!
&//00LK",,R088ED#sTz2[5GGJ^^**:r2>
"55<<ZRPYY
LRST
!55#!%!>!>zSX!Y4
\\*-
r8   )r)   r*   r2   r+   r-   r3   r/   )FNF)__name__
__module____qualname____firstlineno____doc__r   boolr$   r&   TensorintrT   
BoolTensorrd   __static_attributes____classcell__r5   s   @r6   r   r   +   s    
,z 
,4 
,D 
, 
,&M5<< &M &MUX &M]b]i]i &MV 48).	ll ))D0 #'	
 
 r8   r   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jrSr	U =r
$ )r,      z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
c                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)r#   r$   
image_sizer3   num_channelsr(   
isinstancecollectionsabcIterabler.   r   Conv2d
projection)r4   r   rx   r3   ry   r(   r.   r5   s          r6   r$   DeiTPatchEmbeddings.__init__   s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:ir8   rV   r    c                     UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      R	                  S5      R                  SS5      nU$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.r"   r   )rE   ry   
ValueErrorr   flatten	transpose)r4   rV   r]   ry   r:   r;   xs          r6   rd   DeiTPatchEmbeddings.forward   s[    2>2D2D/
&,,,w  OOL)11!4>>q!Dr8   )rx   ry   r.   r3   r   )rg   rh   ri   rj   rk   r$   r&   rm   rd   rp   rq   rr   s   @r6   r,   r,      s.    jELL U\\  r8   r,   modulequerykeyvalueattention_maskscalingr2   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr=         r"   r   rC   )ptrainingr   )
r@   r&   matmulr   r   rJ   softmaxr2   r   
contiguous)
r   r   r   r   r   r   r2   r   attn_weightsattn_outputs
             r6   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$r8   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\
\R                  \R                  4   4S jrSrU =r$ )	DeiTSelfAttention   r   c                 0  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        UR                  U l        U R                  S-  U l        SU l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        g )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads .r   F)bias)r#   r$   r(   num_attention_headshasattrr   r   rn   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr   Linearqkv_biasr   r   r   r4   r   r5   s     r6   r$   DeiTSelfAttention.__init__   sG    : ::a?PVXhHiHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r8   hidden_statesr   r    c                    UR                   S   nUSU R                  U R                  4nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      n[        R                  " U R                  R                  [        5      nU" U UUUS 4U R                  U R                  U R                  (       d  SOU R                   S.UD6u  pU	R#                  5       S S U R$                  4-   nU	R'                  U5      n	X4$ )Nr   r=   r   r"           )r   r   r2   )rE   r   r   r   rL   r   r   r   r   get_interfacer   _attn_implementationr   r   r   r   r   r@   r   rH   )r4   r   r   r]   	new_shape	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapes               r6   rd   DeiTSelfAttention.forward   sO   
 #((+
D$<$<d>V>VV	HH]+00)<FFq!L	jj/44i@JJ1aPjj/44i@JJ1aP(?(M(MKK,,.E)
 *=
*
 nnLL#}}C$2C2C
*
 
*
& #0"4"4"6s";t?Q?Q>S"S%--.EF--r8   )
r   r   r   r   r   r   r   r   r   r   )rg   rh   ri   rj   r   r$   r&   rm   r   r   tuplerd   rp   rq   rr   s   @r6   r   r      sS    ]z ](.||. +,. 
u||U\\)	*	. .r8   r   c                      ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jr	S	r
U =r$ )
DeiTSelfOutput   z
The residual connection is defined in DeiTLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
r   c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g N)	r#   r$   r   r   r(   denser0   r1   r2   r   s     r6   r$   DeiTSelfOutput.__init__   sB    YYv1163E3EF
zz&"<"<=r8   r   input_tensorr    c                 J    U R                  U5      nU R                  U5      nU$ r   r   r2   r4   r   r   s      r6   rd   DeiTSelfOutput.forward  s$    

=1]3r8   r   )rg   rh   ri   rj   rk   r   r$   r&   rm   rd   rp   rq   rr   s   @r6   r   r      sB    
>z >
U\\  RWR^R^  r8   r   c                   t   ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\R                  4S jr
SrU =r$ )	DeiTAttentioni  r   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r#   r$   r   	attentionr   outputr   s     r6   r$   DeiTAttention.__init__  s&    *62$V,r8   r   r   r    c                 R    U R                   " U40 UD6u  p4U R                  X15      nU$ r   r   r   )r4   r   r   self_attn_outputr\   r   s         r6   rd   DeiTAttention.forward  s/    
 #nn]EfE-=r8   r   )rg   rh   ri   rj   r   r$   r&   rm   r   r   rd   rp   rq   rr   s   @r6   r   r     sC    -z -
|| +, 
	 r8   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )DeiTIntermediatei  r   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r#   r$   r   r   r(   intermediate_sizer   rz   
hidden_actstrr   intermediate_act_fnr   s     r6   r$   DeiTIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r8   r   r    c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r4   r   s     r6   rd   DeiTIntermediate.forward&  s&    

=100?r8   r   rg   rh   ri   rj   r   r$   r&   rm   rd   rp   rq   rr   s   @r6   r   r     s/    9z 9U\\ ell  r8   r   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jrSr	U =r
$ )	
DeiTOutputi-  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  5      U l	        g r   )
r#   r$   r   r   r   r(   r   r0   r1   r2   r   s     r6   r$   DeiTOutput.__init__.  sB    YYv779K9KL
zz&"<"<=r8   r   r   r    c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   r   s      r6   rd   DeiTOutput.forward3  s,    

=1]3%4r8   r   r   rr   s   @r6   r   r   -  s=    >z >
U\\  RWR^R^  r8   r   c                   x   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\	\
   S\R                  4S jrS	rU =r$ )
	DeiTLayeri;  z?This corresponds to the Block class in the timm implementation.r   c                 j  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        g )Nr   eps)r#   r$   chunk_size_feed_forwardseq_len_dimr   r   r   intermediater   r   r   	LayerNormr(   layer_norm_epslayernorm_beforelayernorm_afterr   s     r6   r$   DeiTLayer.__init__>  s    '-'E'E$&v.,V4 ( "V-?-?VEZEZ [!||F,>,>FDYDYZr8   r   r   r    c                     U R                  U5      nU R                  " U40 UD6nXA-   nU R                  U5      nU R                  U5      nU R	                  XQ5      nU$ r   )r   r   r   r   r   )r4   r   r   hidden_states_normattention_outputlayer_outputs         r6   rd   DeiTLayer.forwardH  sl    
 "22=A>>*<GG )8 ++M:((6 {{<?r8   )r   r   r   r   r   r   r   )rg   rh   ri   rj   rk   r   r$   r&   rm   r   r   rd   rp   rq   rr   s   @r6   r   r   ;  sH    I[z [|| +, 
	 r8   r   c                   `   ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\
4S jrSrU =r$ )	DeiTEncoderi^  r   c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf rf   )
r#   r$   r   r   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r4   r   r\   r5   s      r6   r$   DeiTEncoder.__init___  sR    ]]uVE]E]?^#_?^!If$5?^#_`
&+# $`s   A&r   r   r    c                 L    U R                    H  nU" U40 UD6nM     [        US9$ )N)last_hidden_state)r   r
   )r4   r   r   layer_modules       r6   rd   DeiTEncoder.forwarde  s.    
 !JJL(A&AM ' ??r8   )r   r   r   )rg   rh   ri   rj   r   r$   r&   rm   r   r   r
   rd   rp   rq   rr   s   @r6   r   r   ^  sD    ,z ,@||@ +,@ 
	@ @r8   r   c                       \ rS rSr% \\S'   SrSrSrSr	S/r
SrSrSrSr\\S.r\R&                  " 5       S	\R*                  \R,                  -  \R.                  -  S
S4S j5       rSrg)DeiTPreTrainedModelip  r   deitrV   )imageTr   )r   
attentionsr   r    Nc                 2   [        U[        R                  [        R                  -  5      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  5      (       aA  [        R                  " UR                  5        [        R                  " UR                  5        g[        U[        5      (       a  [        R                  " UR                  5        [        R                  " UR                  5        [        R                  " UR                   5        UR"                  b!  [        R                  " UR"                  5        ggg)zInitialize the weightsr   )meanstdN)rz   r   r   r~   inittrunc_normal_weightr   initializer_ranger   zeros_r   ones_r   r)   r/   r*   r+   )r4   r   s     r6   _init_weights!DeiTPreTrainedModel._init_weights  s     fbii"))344v}}3DKK<Y<YZ{{&FKK( '--KK$JJv}}%//KK(()KK223KK112  ,F--. -	 0r8    )rg   rh   ri   rj   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r   _can_record_outputsr&   no_gradr   r   r~   r   r  rp   r  r8   r6   r  r  p  s    $O!&*#$N"&"'
 ]]_/BII		$9BLL$H /T / /r8   r  c                      ^  \ rS rSrSS\S\S\SS4U 4S jjjrS\4S	 jr\	\
" SS
9\   SS\R                  S-  S\R                  S-  S\S\\   S\4
S jj5       5       5       rSrU =r$ )	DeiTModeli  Fr   add_pooling_layerr   r    Nc                   > [         TU ]  U5        Xl        [        XS9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U(       a  [        U5      OSU l        U R                  5         g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)r   r   N)r#   r$   r   r   r9   r   encoderr   r   r(   r   	layernorm
DeiTPoolerpooler	post_init)r4   r   r   r   r5   s       r6   r$   DeiTModel.__init__  si     	 (O"6*f&8&8f>S>ST,=j(4 	r8   c                 .    U R                   R                  $ r   )r9   r-   )r4   s    r6   get_input_embeddingsDeiTModel.get_input_embeddings  s    ///r8   )tie_last_hidden_statesrV   rW   rT   r   c                    Uc  [        S5      eU R                  R                  R                  R                  R
                  nUR
                  U:w  a  UR                  U5      nU R                  XUS9nU R                  U5      nUR                  nU R                  U5      nU R                  b  U R                  U5      OSn	[        UU	S9$ )z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
Nz You have to specify pixel_valuesrW   rT   )r   pooler_output)r   r9   r-   r   r  dtypetor"  r   r#  r%  r   )
r4   rV   rW   rT   r   expected_dtypeembedding_outputencoder_outputssequence_outputpooled_outputs
             r6   rd   DeiTModel.forward  s     ?@@ 99DDKKQQ/'??>:L??Tl + 
 ,0<<8H+I);;..98<8OO4UY)-'
 	
r8   )r   r9   r"  r#  r%  )TFNNF)rg   rh   ri   rj   r   rl   r$   r,   r)  r   r   r   r&   rm   ro   r   r   r   rd   rp   rq   rr   s   @r6   r  r    s    z d [_ lp  &0&9 0  E2 -137).	 
llT) 
 ))D0 
 #'	 

 +, 
 
$ 
  3   
r8   r  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )r$  i  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        g r   )
r#   r$   r   r   r(   pooler_output_sizer   r   
pooler_act
activationr   s     r6   r$   DeiTPooler.__init__  s>    YYv1163L3LM
 !2!23r8   r   r    c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r<  )r4   r   first_token_tensorr5  s       r6   rd   DeiTPooler.forward  s6     +1a40

#566r8   )r<  r   r   rr   s   @r6   r$  r$    s/    4z 4
U\\ ell  r8   r$  ad  
    DeiT Model with a decoder on top for masked image modeling, as proposed in [SimMIM](https://huggingface.co/papers/2111.09886).

    <Tip>

    Note that we provide a script to pre-train this model on custom data in our [examples
    directory](https://github.com/huggingface/transformers/tree/main/examples/pytorch/image-pretraining).

    </Tip>
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\\   SS\R                  S-  S\R                  S-  S\S	\\   S\4
S
 jj5       5       rSrU =r$ )DeiTForMaskedImageModelingi  r   r    Nc                 H  > [         TU ]  U5        [        USSS9U l        [        R
                  " [        R                  " UR                  UR                  S-  UR                  -  SS9[        R                  " UR                  5      5      U l        U R                  5         g )NFT)r   r   r"   r   )in_channelsout_channelsrv   )r#   r$   r  r  r   
Sequentialr~   r(   encoder_stridery   PixelShuffledecoderr&  r   s     r6   r$   #DeiTForMaskedImageModeling.__init__  s     fdS	}}II"..#22A58K8KK
 OOF112
 	r8   rV   rW   rT   r   c                 P   U R                   " U4UUS.UD6nUR                  nUSS2SS24   nUR                  u  pxn	[        US-  5      =pUR	                  SSS5      R                  XyX5      nU R                  U5      nSnUGb  U R                  R                  U R                  R                  -  nUR                  SX5      nUR                  U R                  R                  S5      R                  U R                  R                  S5      R                  S5      R                  5       n[        R                  R                  XSS	9nUU-  R!                  5       UR!                  5       S
-   -  U R                  R"                  -  n[%        UUUR&                  UR(                  S9$ )a  
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).

Examples:
```python
>>> from transformers import AutoImageProcessor, DeiTForMaskedImageModeling
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForMaskedImageModeling.from_pretrained("facebook/deit-base-distilled-patch16-224")

>>> num_patches = (model.config.image_size // model.config.patch_size) ** 2
>>> pixel_values = image_processor(images=image, return_tensors="pt").pixel_values
>>> # create random boolean mask of shape (batch_size, num_patches)
>>> bool_masked_pos = torch.randint(low=0, high=2, size=(1, num_patches)).bool()

>>> outputs = model(pixel_values, bool_masked_pos=bool_masked_pos)
>>> loss, reconstructed_pixel_values = outputs.loss, outputs.reconstruction
>>> list(reconstructed_pixel_values.shape)
[1, 3, 224, 224]
```r-  Nr   r=   r>   r   r"   none)	reductiongh㈵>)lossreconstructionr   r  )r  r   rE   rn   rI   rH   rJ  r   rx   r3   repeat_interleaverZ   r   r   rJ   l1_losssumry   r   r   r  )r4   rV   rW   rT   r   outputsr4  r]   sequence_lengthry   r:   r;   reconstructed_pixel_valuesmasked_im_lossr@   r`   reconstruction_losss                    r6   rd   "DeiTForMaskedImageModeling.forward  s   N /3ii/
+%=/
 	/
 "33 *!QrT'24C4I4I1
\_c122)11!Q:BB:]ck &*\\/%B"&;;))T[[-C-CCD-55b$EO11$++2H2H!L""4;;#9#91=1	  #%--"7"7lr"7"s1D8==?488:PTCTUX\XcXcXpXppN(5!//))	
 	
r8   )rJ  r  r7  )rg   rh   ri   rj   r   r$   r   r   r&   rm   ro   rl   r   r   r   rd   rp   rq   rr   s   @r6   rC  rC    s    z d "  -137).	I
llT)I
 ))D0I
 #'	I

 +,I
 
#I
  I
r8   rC  z
    DeiT Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    c                      ^  \ rS rSrS\SS4U 4S jjr\\   SS\R                  S-  S\R                  S-  S\
S	\\   S\4
S
 jj5       5       rSrU =r$ )DeiTForImageClassificationiM  r   r    Nc                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        U R                  5         g NF)r   r   )r#   r$   
num_labelsr  r  r   r   r(   Identity
classifierr&  r   s     r6   r$   #DeiTForImageClassification.__init__T  ss      ++f>	 OUN_N_bcNc"))F$6$68I8IJikititiv 	r8   rV   labelsrT   r   c                     U R                   " U4SU0UD6nUR                  nU R                  USS2SSS24   5      nSnUb  U R                  " X'U R                  40 UD6n[        UUUR                  UR                  S9$ )a  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).

Examples:

```python
>>> from transformers import AutoImageProcessor, DeiTForImageClassification
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> torch.manual_seed(3)  # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> # note: we are loading a DeiTForImageClassificationWithTeacher from the hub here,
>>> # so the head will be randomly initialized, hence the predictions will be random
>>> image_processor = AutoImageProcessor.from_pretrained("facebook/deit-base-distilled-patch16-224")
>>> model = DeiTForImageClassification.from_pretrained("facebook/deit-base-distilled-patch16-224")

>>> inputs = image_processor(images=image, return_tensors="pt")
>>> outputs = model(**inputs)
>>> logits = outputs.logits
>>> # model predicts one of the 1000 ImageNet classes
>>> predicted_class_idx = logits.argmax(-1).item()
>>> print("Predicted class:", model.config.id2label[predicted_class_idx])
Predicted class: Polaroid camera, Polaroid Land camera
```rT   Nr   )rO  logitsr   r  )r  r   r`  loss_functionr   r   r   r  )	r4   rV   rb  rT   r   rT  r4  rd  rO  s	            r6   rd   "DeiTForImageClassification.forward`  s    V /3ii/
%=/
 /
 "33Aq!9: %%fdkkLVLD$!//))	
 	
r8   )r`  r  r^  r7  )rg   rh   ri   rj   r   r$   r   r   r&   rm   rl   r   r   r   rd   rp   rq   rr   s   @r6   r[  r[  M  s    
z 
d 
  -1&*).	=
llT)=
 t#=
 #'	=

 +,=
 
=
  =
r8   r[  zC
    Output type of [`DeiTForImageClassificationWithTeacher`].
    c                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  S-  \S'   Sr\\R                     S-  \S'   Sr\\R                     S-  \S'   S	rg)
+DeiTForImageClassificationWithTeacherOutputi  aF  
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores as the average of the cls_logits and distillation logits.
cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
    class token).
distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
    distillation token).
Nrd  
cls_logitsdistillation_logitsr   r  r  )rg   rh   ri   rj   rk   rd  r&   FloatTensorr  ri  rj  r   r   r  rp   r  r8   r6   rh  rh    s}    	 (,FE$++/J!!D(/48**T1859M5**+d2926Je''(4/6r8   rh  a  
    DeiT Model transformer with image classification heads on top (a linear layer on top of the final hidden state of
    the [CLS] token and a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet.

    .. warning::

           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                      ^  \ rS rSrS\SS4U 4S jjr\\  SS\R                  S-  S\
S\\   S\4S	 jj5       5       rS
rU =r$ )%DeiTForImageClassificationWithTeacheri  r   r    Nc                   > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l
        U R                  5         g r]  )r#   r$   r^  r  r  r   r   r(   r_  cls_classifierdistillation_classifierr&  r   s     r6   r$   .DeiTForImageClassificationWithTeacher.__init__  s      ++f>	 AG@Q@QTU@UBIIf((&*;*;<[][f[f[h 	 AG@Q@QTU@UBIIf((&*;*;<[][f[f[h 	$
 	r8   rV   rT   r   c                     U R                   " U4SU0UD6nUR                  nU R                  US S 2SS S 24   5      nU R                  US S 2SS S 24   5      nXg-   S-  n[	        UUUUR
                  UR                  S9$ )NrT   r   r   r"   )rd  ri  rj  r   r  )r  r   ro  rp  rh  r   r  )	r4   rV   rT   r   rT  r4  ri  rj  rd  s	            r6   rd   -DeiTForImageClassificationWithTeacher.forward  s     /3ii/
%=/
 /
 "33((Aq)AB
"::?1aQR7;ST 2a7:! 3!//))
 	
r8   )ro  r  rp  r^  rf   )rg   rh   ri   rj   r   r$   r   r   r&   rm   rl   r   r   rh  rd   rp   rq   rr   s   @r6   rm  rm    so    z d "  -1).
llT)
 #'
 +,	

 
5
  
r8   rm  )r[  rm  rC  r  r  )Nr   )>rk   collections.abcr{   r   dataclassesr   r&   r    r   r	  activationsr   modeling_layersr	   modeling_outputsr
   r   r   r   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   r   utils.genericr   r   utils.output_capturingr   configuration_deitr   
get_loggerrg   loggerModuler   r,   rm   floatr   r   r   r   r   r   r   r   r  r  r$  rC  r[  rh  rm  __all__r  r8   r6   <module>r     sa     $ !   & ! 9  G & X X I 5 * 
		H	%VRYY Vr")) N !%II%<<% 
% <<	%
 LL4'% T\% % '(%:4.		 4.pRYY $BII "ryy  
 
* F@")) @$ // / /D :
# :
 :
|  	]
!4 ]
]
@ L
!4 L
L
^ 
 7+ 7 7& 
.
,? .

.
br8   