
    Z jb                     8   S r SSKrSSKJr  SSKrSSKJr  SSKJr  SSK	J
r
JrJrJr  SSKJr  SS	KJrJr  S
SKJr  \R*                  " \5      r\" SS9\ " S S\5      5       5       r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r " S S\R2                  5      r  " S S\R2                  5      r! " S  S!\R2                  5      r" " S" S#\R2                  5      r# " S$ S%\R2                  5      r$\ " S& S'\5      5       r%\ " S( S)\%5      5       r&\" S*S9 " S+ S,\%5      5       r'\" S-S9 " S. S/\%5      5       r(/ S0Qr)g)1zPyTorch LeViT model.    N)	dataclass)nn   )initialization)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel)auto_docstringlogging   )LevitConfigzD
    Output type of [`LevitForImageClassificationWithTeacher`].
    )custom_introc                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  S-  \S'   Sr\\R                     S-  \S'   Srg)	,LevitForImageClassificationWithTeacherOutput%   aJ  
logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores as the average of the `cls_logits` and `distillation_logits`.
cls_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the classification head (i.e. the linear layer on top of the final hidden state of the
    class token).
distillation_logits (`torch.FloatTensor` of shape `(batch_size, config.num_labels)`):
    Prediction scores of the distillation head (i.e. the linear layer on top of the final hidden state of the
    distillation token).
Nlogits
cls_logitsdistillation_logitshidden_states )__name__
__module____qualname____firstlineno____doc__r   torchFloatTensor__annotations__r   r   r   tuple__static_attributes__r       y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/levit/modeling_levit.pyr   r   %   sc    	 (,FE$++/J!!D(/48**T1859M5**+d29r#   r   c                   8   ^  \ rS rSrSr SU 4S jjrS rSrU =r$ )LevitConvEmbeddings=   zS
LeViT Conv Embeddings with Batch Norm, used in the initial patch embedding layer.
c	                    > [         T	U ]  5         [        R                  " XX4XVUSS9U l        [        R
                  " U5      U l        g )NF)dilationgroupsbias)super__init__r   Conv2dconvolutionBatchNorm2d
batch_norm)
selfin_channelsout_channelskernel_sizestridepaddingr)   r*   bn_weight_init	__class__s
            r$   r-   LevitConvEmbeddings.__init__B   s@     	99{G_elq
 ..6r#   c                 J    U R                  U5      nU R                  U5      nU$ N)r/   r1   )r2   
embeddingss     r$   forwardLevitConvEmbeddings.forwardK   s&    %%j1
__Z0
r#   )r1   r/   )r   r   r   	r   r   r   r   r   r-   r>   r"   __classcell__r9   s   @r$   r&   r&   =   s    
 mn7 r#   r&   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitPatchEmbeddingsQ   z
LeViT patch embeddings, for final embeddings to be passed to transformer blocks. It consists of multiple
`LevitConvEmbeddings`.
c                 ^  > [         TU ]  5         [        UR                  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   S-  UR
                  UR                  UR                  5      U l        [        R                  " 5       U l        [        UR                  S   S-  UR                  S   UR
                  UR                  UR                  5      U l        UR                  U l        g )Nr            )r,   r-   r&   num_channelshidden_sizesr5   r6   r7   embedding_layer_1r   	Hardswishactivation_layer_1embedding_layer_2activation_layer_2embedding_layer_3activation_layer_3embedding_layer_4r2   configr9   s     r$   r-   LevitPatchEmbeddings.__init__W   so   !4!4!4Q!71!<f>P>PRXR_R_agaoao"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?1)DfFXFXZ`ZgZgioiwiw"
 #%,,.!4"a')<)<Q)?ASASU[UbUbdjdrdr"
 #//r#   c                    UR                   S   nX R                  :w  a  [        S5      eU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nUR                  S5      R                  SS5      $ )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rI   )shaperJ   
ValueErrorrL   rN   rO   rP   rQ   rR   rS   flatten	transpose)r2   pixel_valuesrJ   r=   s       r$   r>   LevitPatchEmbeddings.forwardm   s    #))!,,,,w  ++L9
,,Z8
++J7
,,Z8
++J7
,,Z8
++J7
!!!$..q!44r#   )rN   rP   rR   rL   rO   rQ   rS   rJ   r@   rB   s   @r$   rD   rD   Q   s    
0,5 5r#   rD   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )MLPLayerWithBN}   c                    > [         TU ]  5         [        R                  " XSS9U l        [        R
                  " U5      U l        g )NF)in_featuresout_featuresr+   )r,   r-   r   LinearlinearBatchNorm1dr1   )r2   	input_dim
output_dimr8   r9   s       r$   r-   MLPLayerWithBN.__init__~   s1    iiIUZ[..4r#   c                     U R                  U5      nU R                  UR                  SS5      5      R                  U5      nU$ )Nr   r   )re   r1   rZ   
reshape_asr2   hidden_states     r$   r>   MLPLayerWithBN.forward   s<    {{<0|';';Aq'ABMMl[r#   r1   re   )r   r   r   r   r   r-   r>   r"   rA   rB   s   @r$   r_   r_   }   s    5
 r#   r_   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )LevitSubsample   c                 :   > [         TU ]  5         Xl        X l        g r<   )r,   r-   r6   
resolution)r2   r6   ru   r9   s      r$   r-   LevitSubsample.__init__   s    $r#   c                     UR                   u  p#nUR                  X R                  U R                  U5      S S 2S S U R                  2S S U R                  24   R	                  USU5      nU$ )N)rX   viewru   r6   reshape)r2   rm   
batch_size_channelss        r$   r>   LevitSubsample.forward   sg    "."4"4
x#((__dooW_`~$++~~$++~-

'*b(
+ 	 r#   )ru   r6   rp   rB   s   @r$   rr   rr      s    %
 r#   rr   c                   l   ^  \ rS rSrU 4S jr\R                  " 5       SU 4S jj5       rS rS r	Sr
U =r$ )LevitAttention   c                   > [         TU ]  5         X0l        US-  U l        X l        X@l        XB-  U-  X#-  S-  -   U l        XB-  U-  U l        [        XR                  5      U l	        [        R                  " 5       U l        [        U R                  USS9U l        [        [        R                   " [#        U5      [#        U5      5      5      n[%        U5      nXpl        0 / pU HY  n
U HP  n[)        U
S   US   -
  5      [)        U
S   US   -
  5      4nX;  a  [%        U5      X'   U	R+                  X   5        MR     M[     Xl        0 U l        [0        R                  R3                  [0        R4                  " U[%        U5      5      5      U l        U R9                  S[0        R:                  " U	5      R=                  Xw5      SS9  g )	N      rI   r   )r8   r   attention_bias_idxsF
persistent)r,   r-   num_attention_headsscalekey_dimattention_ratioout_dim_keys_valuesout_dim_projectionr_   queries_keys_valuesr   rM   
activation
projectionlist	itertoolsproductrangelen
len_pointsabsappendindicesattention_bias_cacher   	Parameterzerosattention_biasesregister_buffer
LongTensorry   )r2   rK   r   r   r   ru   pointsr   attention_offsetsr   p1p2offsetr9   s                r$   r-   LevitAttention.__init__   s   #6 d]
.#2#<?R#RU\UruvUv#v "1";>Q"Q#1,@X@X#Y ,,.()@)@,_`ai''j(95;LMN[
$%'7Bbebem,c"Q%"Q%-.@A2034E0F%-089	   $&! % 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A*#Yfk 	 	
r#   c                 f   > [         TU ]  U5        U(       a  U R                  (       a  0 U l        g g g r<   r,   trainr   r2   moder9   s     r$   r   LevitAttention.train   )    dD--(*D% .4r#   c                    U R                   (       a  U R                  S S 2U R                  4   $ [        U5      nX R                  ;  a*  U R                  S S 2U R                  4   U R                  U'   U R                  U   $ r<   trainingr   r   strr   r2   device
device_keys      r$   get_attention_biases#LevitAttention.get_attention_biases   r    ==((D,D,D)DEEVJ!:!::8<8M8MaQUQiQiNi8j))*5,,Z88r#   c                    UR                   u  p#nU R                  U5      nUR                  X#U R                  S5      R	                  U R
                  U R
                  U R                  U R
                  -  /SS9u  pgnUR                  SSSS5      nUR                  SSSS5      nUR                  SSSS5      nXgR                  SS5      -  U R                  -  U R                  UR                  5      -   n	U	R                  SS9n	X-  R                  SS5      R                  X#U R                  5      nU R                  U R!                  U5      5      nU$ Nrx   r   dimr   rI   r   )rX   r   ry   r   splitr   r   permuter[   r   r   r   softmaxrz   r   r   r   )
r2   rm   r{   
seq_lengthr|   r   querykeyvalue	attentions
             r$   r>   LevitAttention.forward   sB   $0$6$6!
"66|D/44ZTMeMegijpp\\4<<)=)=)LMST q 
E aAq)kk!Q1%aAq)MM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!)44Q:BB:[_[r[rst|'DEr#   )r   r   r   r   r   r   r   r   r   r   r   r   r   Tr   r   r   r   r-   r   no_gradr   r   r>   r"   rA   rB   s   @r$   r   r      s1    
> ]]_+ +
9 r#   r   c                   l   ^  \ rS rSrU 4S jr\R                  " 5       SU 4S jj5       rS rS r	Sr
U =r$ )LevitAttentionSubsample   c	                   > [         TU ]  5         X@l        US-  U l        X0l        XPl        XS-  U-  X4-  -   U l        XS-  U-  U l        Xl        [        XR                  5      U l
        [        Xg5      U l        [        XU-  5      U l        [        R                  " 5       U l        [        U R                  U5      U l        0 U l        ['        [(        R*                  " [-        U5      [-        U5      5      5      n	['        [(        R*                  " [-        U5      [-        U5      5      5      n
[/        U	5      [/        U
5      pXl        Xl        0 / pU
 Hv  nU	 Hm  nSn[5        US   U-  US   -
  US-
  S-  -   5      [5        US   U-  US   -
  US-
  S-  -   5      4nUU;  a  [/        U5      UU'   UR7                  UU   5        Mo     Mx     Xl        [:        R                  R=                  [:        R>                  " U[/        U5      5      5      U l         U RC                  S[:        RD                  " U5      RG                  X5      SS9  g )Nr   r   r   rI   r   Fr   )$r,   r-   r   r   r   r   r   r   resolution_outr_   keys_valuesrr   queries_subsamplequeriesr   rM   r   r   r   r   r   r   r   r   len_points_r   r   r   r   r   r   r   r   r   r   ry   )r2   rg   rh   r   r   r   r6   resolution_inr   r   points_r   r   r   r   r   r   sizer   r9   s                      r$   r-    LevitAttentionSubsample.__init__   s    	#6 d]
.#2#<?R#RU\Ur#r "1";>Q"Q,))5M5MN!/!F%i;N1NO,,.()@)@*M$&!i''m(<eM>RSTy((~)>n@UVW"%f+s7|K&$%'7Bbefnr!u4qA~EFBqETZN]_`a]bLbfjmnfnrsesLsHtu!22034E0F%f-089    % 2 25;;?RTWXiTj3k l!5#3#3G#<#A#A+#Zgl 	 	
r#   c                 f   > [         TU ]  U5        U(       a  U R                  (       a  0 U l        g g g r<   r   r   s     r$   r   LevitAttentionSubsample.train  r   r#   c                    U R                   (       a  U R                  S S 2U R                  4   $ [        U5      nX R                  ;  a*  U R                  S S 2U R                  4   U R                  U'   U R                  U   $ r<   r   r   s      r$   r   ,LevitAttentionSubsample.get_attention_biases  r   r#   c                 4   UR                   u  p#nU R                  U5      R                  X#U R                  S5      R	                  U R
                  U R                  U R
                  -  /SS9u  pVUR                  SSSS5      nUR                  SSSS5      nU R                  U R                  U5      5      nUR                  X R                  S-  U R                  U R
                  5      R                  SSSS5      nXuR                  SS5      -  U R                  -  U R                  UR                  5      -   nUR                  SS9nX-  R                  SS5      R!                  USU R"                  5      nU R%                  U R'                  U5      5      nU$ r   )rX   r   ry   r   r   r   r   r   r   r   r   r[   r   r   r   r   rz   r   r   r   )	r2   rm   r{   r   r|   r   r   r   r   s	            r$   r>   LevitAttentionSubsample.forward  sr   $0$6$6!
\*T*$*B*BBGUDLL$"6"6"EFAUN 	
 kk!Q1%aAq)T33LAB

:':':A'=t?W?WY]YeYefnnq!Q
 MM"b11DJJ>AZAZ[g[n[nAoo	%%"%-	!)44Q:BB:rSWSjSjkt|'DEr#   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rB   s   @r$   r   r      s2    .
` ]]_+ +
9 r#   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitMLPLayeri-  z=
MLP Layer with `2X` expansion in contrast to ViT with `4X`.
c                    > [         TU ]  5         [        X5      U l        [        R
                  " 5       U l        [        X!5      U l        g r<   )r,   r-   r_   	linear_upr   rM   r   linear_down)r2   rg   
hidden_dimr9   s      r$   r-   LevitMLPLayer.__init__2  s4    '	>,,.)*@r#   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r<   )r   r   r   rl   s     r$   r>   LevitMLPLayer.forward8  s4    ~~l3|4''5r#   )r   r   r   r@   rB   s   @r$   r   r   -  s    A r#   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitResidualLayeri?  z
Residual Block for LeViT
c                 :   > [         TU ]  5         Xl        X l        g r<   )r,   r-   module	drop_rate)r2   r   r   r9   s      r$   r-   LevitResidualLayer.__init__D  s    "r#   c                    U R                   (       a  U R                  S:  a  [        R                  " UR	                  S5      SSUR
                  S9nUR                  U R                  5      R                  SU R                  -
  5      R                  5       nXR                  U5      U-  -   nU$ XR                  U5      -   nU$ )Nr   r   )r   )
r   r   r   randr   r   ge_divdetachr   )r2   rm   rnds      r$   r>   LevitResidualLayer.forwardI  s    ==T^^a/**\..q11a@S@STC''$..)--a$...@AHHJC'++l*Cc*IIL'++l*CCLr#   )r   r   r@   rB   s   @r$   r   r   ?  s    #
   r#   r   c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )
LevitStageiT  zH
LeViT Stage consisting of `LevitMLPLayer` and `LevitAttention` layers.
c                 
  > [         TU ]  5         / U l        Xl        Xl        [        U5       H  nU R                  R                  [        [        X4XgU
5      U R                  R                  5      5        US:  d  MO  X8-  nU R                  R                  [        [        X<5      U R                  R                  5      5        M     U	S   S:X  Ga  U R                  S-
  U	S   -  S-   U l        U R                  R                  [        U R                  R                  X"S-    U	S   U	S   U	S   U	S   U
U R                  S.65        U R                  U l        U	S   S:  a  U R                  R                  US-      U	S   -  nU R                  R                  [        [        U R                  R                  US-      U5      U R                  R                  5      5        [        R                  " U R                  5      U l        g )	Nr   	Subsampler      rI   r   )r   r   r   r6   r   r   rH   )r,   r-   layersrU   r   r   r   r   r   drop_path_rater   r   r   rK   r   
ModuleList)r2   rU   idxrK   r   depthsr   r   	mlp_ratiodown_opsr   r|   r   r9   s                r$   r-   LevitStage.__init__Y  s    	*vAKK""<:M`mnKK.. 1})5
""&}\'NPTP[P[PjPjk  A;+%#'#5#5#9hqk"IA"MDKK'[[--c!G<$QK(0$,QK#A;"/#'#6#6
 "&!4!4D{Q![[55cAg>!L
""&%dkk&>&>sQw&GTVZVaVaVpVp mmDKK0r#   c                     U R                   $ r<   )r   )r2   s    r$   get_resolutionLevitStage.get_resolution  s    !!!r#   c                 <    U R                    H  nU" U5      nM     U$ r<   )r   )r2   rm   layers      r$   r>   LevitStage.forward  s     [[E .L !r#   )rU   r   r   r   )
r   r   r   r   r   r-   r   r>   r"   rA   rB   s   @r$   r   r   T  s    51n" r#   r   c                   6   ^  \ rS rSrSrU 4S jrSS jrSrU =r$ )LevitEncoderi  z;
LeViT Encoder consisting of multiple `LevitStage` stages.
c                   > [         TU ]  5         Xl        U R                  R                  U R                  R                  -  n/ U l        U R                  R                  R                  S/5        [        [        UR                  5      5       H  n[        UUUR                  U   UR                  U   UR                  U   UR                  U   UR                  U   UR                   U   UR                  U   U5
      nUR#                  5       nU R
                  R                  U5        M     [$        R&                  " U R
                  5      U l        g )N )r,   r-   rU   
image_size
patch_sizestagesr   r   r   r   r   r   rK   r   r   r   r   r   r   r   )r2   rU   ru   	stage_idxstager9   s        r$   r-   LevitEncoder.__init__  s   [[++t{{/E/EE
##RD)s6==12I##I.y)i(**95&&y1  +	*E --/JKKu% 3  mmDKK0r#   c                     U(       a  SOS nU R                    H  nU(       a  XA4-   nU" U5      nM     U(       a  XA4-   nU(       d  [        S X4 5       5      $ [        XS9$ )Nr   c              3   .   #    U  H  oc  M  Uv   M     g 7fr<   r   ).0vs     r$   	<genexpr>'LevitEncoder.forward.<locals>.<genexpr>  s     W$Eq$Es   	)last_hidden_stater   )r	  r!   r   )r2   rm   output_hidden_statesreturn_dictall_hidden_statesr  s         r$   r>   LevitEncoder.forward  sd    "6BD[[E#$5$G! .L !
   1O CW\$EWWW-nnr#   )rU   r	  )FTr@   rB   s   @r$   r  r    s    12o or#   r  c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )LevitClassificationLayeri  z
LeViT Classification Layer
c                    > [         TU ]  5         [        R                  " U5      U l        [        R
                  " X5      U l        g r<   )r,   r-   r   rf   r1   rd   re   )r2   rg   rh   r9   s      r$   r-   !LevitClassificationLayer.__init__  s.    ..3ii	6r#   c                 J    U R                  U5      nU R                  U5      nU$ r<   ro   )r2   rm   r   s      r$   r>    LevitClassificationLayer.forward  s#    |4\*r#   ro   r@   rB   s   @r$   r  r    s    7
 r#   r  c                   F   ^  \ rS rSr% \\S'   SrSrSrS/r	U 4S jr
SrU =r$ )	LevitPreTrainedModeli  rU   levitr\   )imager   c                   > [         TU ]  U5        [        U[        5      (       ad  [        R
                  " UR                  [        R                  " UR                  5      R                  UR                  UR                  5      5        g [        U[        5      (       ad  [        R
                  " UR                  [        R                  " UR                  5      R                  UR                  UR                  5      5        g g r<   )r,   _init_weights
isinstancer   initcopy_r   r   r   r   ry   r   r   r   )r2   r   r9   s     r$   r#  "LevitPreTrainedModel._init_weights  s    f%fn--JJ**E,<,<V^^,L,Q,QRXRcRcekevev,w  788JJ**  055f6H6H&J[J[\ 9r#   r   )r   r   r   r   r   r    base_model_prefixmain_input_nameinput_modalities_no_split_modulesr#  r"   rA   rB   s   @r$   r  r    s-    $O!-.
 
r#   r  c                      ^  \ rS rSrU 4S jr\   S
S\R                  S-  S\S-  S\S-  S\	\
-  4S jj5       rS	rU =r$ )
LevitModeli  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        U R                  5         g r<   )r,   r-   rU   rD   patch_embeddingsr  encoder	post_initrT   s     r$   r-   LevitModel.__init__  s8      4V <#F+r#   Nr\   r  r  returnc                 @   Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eU R	                  U5      nU R                  UUUS9nUS   nUR                  SS9nU(       d	  Xx4USS  -   $ [        UUUR                  S9$ )Nz You have to specify pixel_valuesr  r  r   r   r   )r  pooler_outputr   )	rU   r  r  rY   r/  r0  meanr   r   )	r2   r\   r  r  kwargsr=   encoder_outputsr  pooled_outputs	            r$   r>   LevitModel.forward  s     %9$D $++JjJj 	 &1%<k$++BYBY?@@**<8
,,!5# ' 
 ,A. *..1.5%58KKK7/')77
 	
r#   )rU   r0  r/  NNN)r   r   r   r   r-   r   r   r   boolr!   r   r>   r"   rA   rB   s   @r$   r-  r-    sf      26,0#'	"
''$."
 #Tk"
 D[	"
 
9	9"
 "
r#   r-  z
    Levit Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                      ^  \ rS rSrU 4S jr\    SS\R                  S-  S\R                  S-  S\	S-  S\	S-  S\
\-  4
S	 jj5       rS
rU =r$ )LevitForImageClassificationi  c                 >  > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        U R                  5         g Nr   rx   )r,   r-   rU   
num_labelsr-  r   r  rK   r   r   Identity
classifierr1  rT   s     r$   r-   $LevitForImageClassification.__init__#  s~      ++'

   1$ %V%8%8%<f>O>OP""$ 	 	r#   Nr\   labelsr  r  r3  c                 H   Ub  UOU R                   R                  nU R                  XUS9nUS   nUR                  S5      nU R	                  U5      nSn	Ub  U R                  X(U R                   5      n	U(       d  U4USS -   n
U	b  U	4U
-   $ U
$ [        U	UUR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr5  r   r   rI   )lossr   r   )rU   r  r   r7  rD  loss_functionr	   r   )r2   r\   rF  r  r  r8  outputssequence_outputr   rH  outputs              r$   r>   #LevitForImageClassification.forward3  s     &1%<k$++BYBY**\bm*n!!*)..q11%%fdkkBDY,F)-)9TGf$EvE3!//
 	
r#   )rD  rU   r   rB  )NNNN)r   r   r   r   r-   r   r   r   r   r=  r!   r	   r>   r"   rA   rB   s   @r$   r?  r?    s~       26*.,0#'"
''$."
   4'"
 #Tk	"

 D["
 
5	5"
 "
r#   r?  ap  
    LeViT Model transformer with image classification heads on top (a linear layer on top of the final hidden state and
    a linear layer on top of the final hidden state of the distillation token) e.g. for ImageNet. .. warning::
           This model supports inference-only. Fine-tuning with distillation (i.e. with a teacher) is not yet
           supported.
    c                      ^  \ rS rSrU 4S jr\   S
S\R                  S-  S\S-  S\S-  S\	\
-  4S jj5       rS	rU =r$ )&LevitForImageClassificationWithTeacheriY  c                   > [         TU ]  U5        Xl        UR                  U l        [	        U5      U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        UR                  S:  a#  [        UR                  S   UR                  5      O[        R                  R                  5       U l        U R                  5         g rA  )r,   r-   rU   rB  r-  r   r  rK   r   r   rC  rD  classifier_distillr1  rT   s     r$   r-   /LevitForImageClassificationWithTeacher.__init__b  s      ++'

   1$ %V%8%8%<f>O>OP""$ 	   1$ %V%8%8%<f>O>OP""$ 	 	r#   Nr\   r  r  r3  c                 &   Ub  UOU R                   R                  nU R                  XUS9nUS   nUR                  S5      nU R	                  U5      U R                  U5      pXx-   S-  n	U(       d  XU4USS  -   n
U
$ [        U	UUUR                  S9$ )Nr5  r   r   rI   )r   r   r   r   )rU   r  r   r7  rD  rQ  r   r   )r2   r\   r  r  r8  rJ  rK  r   distill_logitsr   rL  s              r$   r>   .LevitForImageClassificationWithTeacher.forwardw  s     &1%<k$++BYBY**\bm*n!!*)..q1%)___%EtG^G^_nGoN-2.9GABKGFM;! .!//	
 	
r#   )rD  rQ  rU   r   rB  r<  )r   r   r   r   r-   r   r   r   r=  r!   r   r>   r"   rA   rB   s   @r$   rO  rO  Y  sf    *  26,0#'	
''$.
 #Tk
 D[	
 
=	=
 
r#   rO  )r?  rO  r-  r  )*r   r   dataclassesr   r   r   r  r   r%  modeling_outputsr   r   r	   r
   modeling_utilsr   utilsr   r   configuration_levitr   
get_loggerr   loggerr   Moduler&   rD   r_   rr   r   r   r   r   r   r  r  r  r-  r?  rO  __all__r   r#   r$   <module>r_     s     !   &  . , , 
		H	% 
 :; : :$")) ()5299 )5X	RYY 	RYY =RYY =@Sbii SlBII $   *B BJ+o299 +o\ryy   ?  ( ,
% ,
 ,
^ 4
"6 4
4
n 0
-A 0
0
fr#   