
    Z jFR                     d   S SK rS SK Jr  S SKrS SKJr  SSKJr  SSK	J
r
  SSKJr  SSKJrJrJr  SSKJrJr  SS	KJr  SS
KJrJrJr  SSKJrJr  SSKJr  SSKJ r    " S S\RB                  5      r" " S S\RB                  5      r#  S5S\RB                  S\RH                  S\RH                  S\RH                  S\RH                  S-  S\%S-  S\%S\\   4S jjr& " S S\RB                  5      r' " S S\RB                  5      r( " S  S!\RB                  5      r) " S" S#\RB                  5      r* " S$ S%\RB                  5      r+ " S& S'\5      r,\ " S( S)\5      5       r- " S* S+\RB                  5      r. " S, S-\RB                  5      r/\ " S. S/\-5      5       r0\" S0S19 " S2 S3\-5      5       r1/ S4Qr2g)6    N)Callable   )initialization)ACT2FN)GradientCheckpointingLayer)BaseModelOutputBaseModelOutputWithPoolingImageClassifierOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstring	torch_int)can_return_tuplemerge_with_config_defaults)capture_outputs   )IJepaConfigc                   v   ^  \ rS rSrSrS\4U 4S jjrS
S\R                  S\	S\R                  4S jjr
S	rU =r$ )IJepaPatchEmbeddings   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
configc                   > [         TU ]  5         UR                  UR                  p2UR                  UR
                  pT[        U[        R                  R                  5      (       a  UOX"4n[        U[        R                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        X@l        X`l
        [        R                  " XEX3S9U l        g )Nr   r   )kernel_sizestride)super__init__
image_size
patch_sizenum_channelshidden_size
isinstancecollectionsabcIterablenum_patchesnnConv2d
projection)selfr   r   r    r!   r"   r'   	__class__s          y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/ijepa/modeling_ijepa.pyr   IJepaPatchEmbeddings.__init__    s    !'!2!2F4E4EJ$*$7$79K9Kk#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&))L:i    pixel_valuesinterpolate_pos_encodingreturnc                    UR                   u  p4pVX@R                  :w  a  [        SU R                   SU S35      eU(       dV  XPR                  S   :w  d  X`R                  S   :w  a2  [        SU SU SU R                  S    SU R                  S    S	3	5      eU R	                  U5      R                  S
5      R                  SS
5      nU$ )NzoMake sure that the channel dimension of the pixel values match with the one set in the configuration. Expected z	 but got .r   r   zInput image size (*z) doesn't match model (z).   )shaper!   
ValueErrorr   r*   flatten	transpose)r+   r0   r1   
batch_sizer!   heightwidth
embeddingss           r-   forwardIJepaPatchEmbeddings.forward/   s    2>2D2D/
&,,,!../yaI  (++u8J/J (% 9+,Adooa.@-AE  __\2::1=GG1M
r/   )r   r!   r'   r    r*   F)__name__
__module____qualname____firstlineno____doc__r   r   torchTensorboolr?   __static_attributes____classcell__r,   s   @r-   r   r      s@    j{ jELL D ]b]i]i  r/   r   c            	          ^  \ rS rSrSrSS\S\SS4U 4S jjjrS\R                  S	\
S
\
S\R                  4S jr  SS\R                  S\R                  S-  S\S\R                  4S jjrSrU =r$ )IJepaEmbeddings@   zZ
Construct the CLS token, position and patch embeddings. Optionally, also the mask token.
r   use_mask_tokenr2   Nc                   > [         TU ]  5         U(       a6  [        R                  " [        R
                  " SSUR                  5      5      OS U l        [        U5      U l	        U R                  R                  n[        R                  " [        R                  " SX1R                  5      5      U l        [        R                  " UR                  5      U l        UR                   U l        Xl        g )Nr   )r   r   r(   	ParameterrG   zerosr"   
mask_tokenr   patch_embeddingsr'   randnposition_embeddingsDropouthidden_dropout_probdropoutr    r   )r+   r   rP   r'   r,   s       r-   r   IJepaEmbeddings.__init__E   s    Q_",,u{{1a9K9K'LMei 4V <++77#%<<A{L^L^0_#` zz&"<"<= ++r/   r>   r<   r=   c                 ,   UR                   S   nU R                  R                   S   n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  nUR                   S   nX R
                  -  nX0R
                  -  n	[        US-  5      n
UR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SSS	9nUR                  SSSS5      R                  SSU5      nU$ )
a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   g      ?r   r   r6   bicubicF)sizemodealign_corners)r7   rW   rG   jit
is_tracingr    r   reshapepermuter(   
functionalinterpolateview)r+   r>   r<   r=   r'   num_positionspatch_pos_embeddim
new_height	new_widthsqrt_num_positionss              r-   r1   (IJepaEmbeddings.interpolate_pos_encodingO   s    !&&q)0066q9 yy##%%+*F6?+++22r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nr/   r0   bool_masked_posr1   c                 n   UR                   u  pEpgU R                  XS9nUbX  UR                   S   n	U R                  R                  XIS5      n
UR	                  S5      R                  U
5      nUSU-
  -  X-  -   nU(       a  XR                  XU5      -   nOXR                  -   nU R                  U5      nU$ )N)r1   r   r]   g      ?)	r7   rU   rT   expand	unsqueezetype_asr1   rW   rZ   )r+   r0   rp   r1   r;   _r<   r=   r>   
seq_lengthmask_tokensmasks               r-   r?   IJepaEmbeddings.forwardv   s     (4'9'9$
v**<*k
&#))!,J//00LK",,R088ED#sTz2[5GGJ $#&C&CJX]&^^J#&>&>>J\\*-
r/   )r   rZ   rT   rU   r    rW   rA   NF)rB   rC   rD   rE   rF   r   rI   r   rG   rH   intr1   
BoolTensorr?   rJ   rK   rL   s   @r-   rN   rN   @   s    { D T  %5<< % %UX %]b]i]i %T 48).	ll ))D0 #'	
 
 r/   rN   modulequerykeyvalueattention_maskscalingrZ   kwargsc                    Uc  UR                  S5      S-  n[        R                  " XR                  SS5      5      U-  nUb  X-   n[        R
                  R                  USS9n[        R
                  R                  XU R                  S9n[        R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nr]         r6   r   rk   )ptrainingr   )
r_   rG   matmulr:   r(   rf   softmaxrZ   r   
contiguous)
r}   r~   r   r   r   r   rZ   r   attn_weightsattn_outputs
             r-   eager_attention_forwardr      s     **R.D( <<}}Q':;gEL!#4==((2(>L==((6??([L,,|3K''1-88:K$$r/   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\
\R                  \R                  4   4S jrSrU =r$ )	IJepaSelfAttention   r   c                 0  > [         TU ]  5         UR                  UR                  -  S:w  a7  [	        US5      (       d&  [        SUR                   SUR                   S35      eXl        UR                  U l        [        UR                  UR                  -  5      U l        U R                  U R                  -  U l	        UR                  U l        U R                  S-  U l        SU l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        [        R                  " UR                  U R                  UR                   S9U l        g )	Nr   embedding_sizezThe hidden size z4 is not a multiple of the number of attention heads r4   r   F)bias)r   r   r"   num_attention_headshasattrr8   r   r{   attention_head_sizeall_head_sizeattention_probs_dropout_probdropout_probr   	is_causalr(   Linearqkv_biasr~   r   r   r+   r   r,   s     r-   r   IJepaSelfAttention.__init__   sG    : ::a?PVXhHiHi"6#5#5"6 7334A7 
 #)#=#= #&v'9'9F<V<V'V#W !558P8PP"??//5YYv1143E3EFOO\
99V//1C1C&//ZYYv1143E3EFOO\
r/   hidden_statesr   r2   c                    UR                   S   nUSU R                  U R                  4nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      nU R                  U5      R                  " U6 R                  SS5      n[        R                  " U R                  R                  [        5      nU" U UUUS 4U R                  U R                  U R                  (       d  SOU R                   S.UD6u  pU	R#                  5       S S U R$                  4-   nU	R'                  U5      n	X4$ )Nr   r]   r   r6           )r   r   rZ   )r7   r   r   r   rh   r:   r   r~   r   get_interfacer   _attn_implementationr   r   r   r   r   r_   r   rd   )r+   r   r   r;   	new_shape	key_layervalue_layerquery_layerattention_interfacecontext_layerattention_probsnew_context_layer_shapes               r-   r?   IJepaSelfAttention.forward   sO   
 #((+
D$<$<d>V>VV	HH]+00)<FFq!L	jj/44i@JJ1aPjj/44i@JJ1aP(?(M(MKK,,.E)
 *=
*
 nnLL#}}C$2C2C
*
 
*
& #0"4"4"6s";t?Q?Q>S"S%--.EF--r/   )
r   r   r   r   r   r   r   r~   r   r   )rB   rC   rD   rE   r   r   rG   rH   r   r   tupler?   rJ   rK   rL   s   @r-   r   r      sS    ]{ ](.||. +,. 
u||U\\)	*	. .r/   r   c                      ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jr	S	r
U =r$ )
IJepaSelfOutput   z
The residual connection is defined in IJepaLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
r   c                    > [         TU ]  5         [        R                  " UR                  UR                  5      U l        [        R                  " UR                  5      U l        g N)	r   r   r(   r   r"   denserX   rY   rZ   r   s     r-   r   IJepaSelfOutput.__init__   sB    YYv1163E3EF
zz&"<"<=r/   r   input_tensorr2   c                 J    U R                  U5      nU R                  U5      nU$ r   r   rZ   r+   r   r   s      r-   r?   IJepaSelfOutput.forward   s$    

=1]3r/   r   )rB   rC   rD   rE   rF   r   r   rG   rH   r?   rJ   rK   rL   s   @r-   r   r      sB    
>{ >
U\\  RWR^R^  r/   r   c                   t   ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\R                  4S jr
SrU =r$ )	IJepaAttention   r   c                 b   > [         TU ]  5         [        U5      U l        [	        U5      U l        g r   )r   r   r   	attentionr   outputr   s     r-   r   IJepaAttention.__init__   s&    +F3%f-r/   r   r   r2   c                 R    U R                   " U40 UD6u  p4U R                  X15      nU$ r   r   r   )r+   r   r   self_attn_outputru   r   s         r-   r?   IJepaAttention.forward   s/    
 #nn]EfE-=r/   r   )rB   rC   rD   rE   r   r   rG   rH   r   r   r?   rJ   rK   rL   s   @r-   r   r      sC    .{ .
|| +, 
	 r/   r   c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )IJepaIntermediatei  r   c                   > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                  [        5      (       a  [        UR                     U l        g UR                  U l        g r   )r   r   r(   r   r"   intermediate_sizer   r#   
hidden_actstrr   intermediate_act_fnr   s     r-   r   IJepaIntermediate.__init__  s`    YYv1163K3KL
f''--'-f.?.?'@D$'-'8'8D$r/   r   r2   c                 J    U R                  U5      nU R                  U5      nU$ r   r   r   )r+   r   s     r-   r?   IJepaIntermediate.forward  s&    

=100?r/   r   rB   rC   rD   rE   r   r   rG   rH   r?   rJ   rK   rL   s   @r-   r   r     s/    9{ 9U\\ ell  r/   r   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\R                  4S jrSr	U =r
$ )	IJepaOutputi  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        R                  " UR                  5      U l	        g r   )
r   r   r(   r   r   r"   r   rX   rY   rZ   r   s     r-   r   IJepaOutput.__init__  sB    YYv779K9KL
zz&"<"<=r/   r   r   r2   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r   r   r   s      r-   r?   IJepaOutput.forward  s,    

=1]3%4r/   r   r   rL   s   @r-   r   r     s=    >{ >
U\\  RWR^R^  r/   r   c                   x   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\	\
   S\R                  4S jrS	rU =r$ )

IJepaLayeri!  z?This corresponds to the Block class in the timm implementation.r   c                 j  > [         TU ]  5         UR                  U l        SU l        [	        U5      U l        [        U5      U l        [        U5      U l	        [        R                  " UR                  UR                  S9U l        [        R                  " UR                  UR                  S9U l        g )Nr   eps)r   r   chunk_size_feed_forwardseq_len_dimr   r   r   intermediater   r   r(   	LayerNormr"   layer_norm_epslayernorm_beforelayernorm_afterr   s     r-   r   IJepaLayer.__init__$  s    '-'E'E$'/-f5!&) "V-?-?VEZEZ [!||F,>,>FDYDYZr/   r   r   r2   c                     U R                  U5      nU R                  " U40 UD6nXA-   nU R                  U5      nU R                  U5      nU R	                  XQ5      nU$ r   )r   r   r   r   r   )r+   r   r   hidden_states_normattention_outputlayer_outputs         r-   r?   IJepaLayer.forward.  sl    
 "22=A>>*<GG )8 ++M:((6 {{<?r/   )r   r   r   r   r   r   r   )rB   rC   rD   rE   rF   r   r   rG   rH   r   r   r?   rJ   rK   rL   s   @r-   r   r   !  sH    I[{ [|| +, 
	 r/   r   c                       \ rS rSr% \\S'   SrSrSrSr	SS/r
SrSrSrSr\\S	.r\R&                  " 5       S
\R*                  \R,                  -  \R.                  -  SS4S j5       rSrg)IJepaPreTrainedModeliC  r   ijepar0   )imageTrN   r   )r   
attentionsr}   r2   Nc                    [        U[        R                  [        R                  45      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  5      (       aA  [        R                  " UR                  5        [        R                  " UR                  5        g[        U[        5      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        ggg)zInitialize the weightsr   )meanstdN)r#   r(   r   r)   inittrunc_normal_weightr   initializer_ranger   zeros_r   ones_rN   rW   rT   )r+   r}   s     r-   _init_weights"IJepaPreTrainedModel._init_weightsT  s     fryy"))455v}}3DKK<Y<YZ{{&FKK( '--KK$JJv}}%00v99IfIfg  ,F--. - 1r/    )rB   rC   rD   rE   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_no_split_modules_supports_sdpa_supports_flash_attn_supports_flex_attn_supports_attention_backendr   r   _can_record_outputsrG   no_gradr(   r   r)   r   r   rJ   r   r/   r-   r   r   C  s    $O!&*#*L9N"&#(
 ]]_/BII		$9BLL$H /T / /r/   r   c                   `   ^  \ rS rSrS\4U 4S jjrS\R                  S\\	   S\
4S jrSrU =r$ )	IJepaEncoderid  r   c                    > [         TU ]  5         Xl        [        R                  " [        UR                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l	        g s  snf rz   )
r   r   r   r(   
ModuleListrangenum_hidden_layersr   layergradient_checkpointing)r+   r   ru   r,   s      r-   r   IJepaEncoder.__init__e  sR    ]]fF^F^@_#`@_1Jv$6@_#`a
&+# $as   A&r   r   r2   c                 L    U R                    H  nU" U40 UD6nM     [        US9$ )N)last_hidden_state)r  r   )r+   r   r   layer_modules       r-   r?   IJepaEncoder.forwardk  s.    
 !JJL(A&AM ' ??r/   )r   r  r  )rB   rC   rD   rE   r   r   rG   rH   r   r   r   r?   rJ   rK   rL   s   @r-   r	  r	  d  sD    ,{ ,@||@ +,@ 
	@ @r/   r	  c                   j   ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )IJepaPooleriv  r   c                    > [         TU ]  5         [        R                  " UR                  UR
                  5      U l        [        UR                     U l	        g r   )
r   r   r(   r   r"   pooler_output_sizer   r   
pooler_act
activationr   s     r-   r   IJepaPooler.__init__w  s>    YYv1163L3LM
 !2!23r/   r   r2   c                 \    US S 2S4   nU R                  U5      nU R                  U5      nU$ )Nr   )r   r  )r+   r   first_token_tensorpooled_outputs       r-   r?   IJepaPooler.forward|  s6     +1a40

#566r/   )r  r   r   rL   s   @r-   r  r  v  s/    4{ 4
U\\ ell  r/   r  c                      ^  \ rS rSrSS\S\S\4U 4S jjjrS\4S jr\	\
" SS	9\   SS\R                  S
-  S\R                  S
-  S\S
-  S\\   S\4
S jj5       5       5       rSrU =r$ )
IJepaModeli  Fr   add_pooling_layerrP   c                   > [         TU ]  U5        Xl        [        XS9U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U(       a  [        U5      OSU l        U R                  5         g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)rP   r   N)r   r   r   rN   r>   r	  encoderr(   r   r"   r   	layernormr  pooler	post_init)r+   r   r"  rP   r,   s       r-   r   IJepaModel.__init__  si     	 )&P#F+f&8&8f>S>ST->k&)D 	r/   r2   c                 .    U R                   R                  $ r   )r>   rU   )r+   s    r-   get_input_embeddingsIJepaModel.get_input_embeddings  s    ///r/   )tie_last_hidden_statesNr0   rp   r1   r   c                    Uc  [        S5      eU R                  R                  R                  R                  R
                  nUR
                  U:w  a  UR                  U5      nU R                  XUS9nU R                  U5      nUR                  nU R                  U5      nU R                  b  U R                  U5      OSn	[        XS9$ )z
bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`, *optional*):
    Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
Nz You have to specify pixel_values)rp   r1   )r  pooler_output)r8   r>   rU   r*   r   dtypetor$  r  r%  r&  r	   )
r+   r0   rp   r1   r   expected_dtypeembedding_outputencoder_outputssequence_outputr  s
             r-   r?   IJepaModel.forward  s     ?@@ 99DDKKQQ/'??>:L??Tl + 
 ,0<<8H+I);;..98<8OO4UY)Oiir/   )r   r>   r$  r%  r&  )FFNNN)rB   rC   rD   rE   r   rI   r   r   r*  r   r   r   rG   rH   r|   r   r   r	   r?   rJ   rK   rL   s   @r-   r!  r!    s    { t ]a  $0&: 0  E2 -13704	jllT)j ))D0j #'+	j
 +,j 
$j  3  jr/   r!  a  
    IJepa Model transformer with an image classification head on top (a linear layer on top of the final hidden states)
    e.g. for ImageNet.

    <Tip>

        Note that it's possible to fine-tune IJepa on higher resolution images than the ones it has been trained on, by
        setting `interpolate_pos_encoding` to `True` in the forward of the model. This will interpolate the pre-trained
        position embeddings to the higher resolution.

    </Tip>
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr\\   SS\R                  S-  S\R                  S-  S\
S-  S\\   S	\4
S
 jj5       5       rSrU =r$ )IJepaForImageClassificationi  r   c                 .  > [         TU ]  U5        UR                  U l        [        USS9U l        UR                  S:  a+  [
        R                  " UR                  UR                  5      O[
        R                  " 5       U l	        U R                  5         g )NF)r"  r   )r   r   
num_labelsr!  r   r(   r   r"   Identity
classifierr'  r   s     r-   r   $IJepaForImageClassification.__init__  ss      ++%@
 OUN_N_bcNc"))F$6$68I8IJikititiv 	r/   Nr0   labelsr1   r   r2   c                    U R                   " U4SU0UD6nUR                  nU R                  UR                  SS95      nSnUb  U R                  " X'U R
                  40 UD6n[        UUUR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
r1   r   r   N)losslogitsr   r   )	r   r  r=  r   loss_functionr   r
   r   r   )	r+   r0   r?  r1   r   outputsr4  rB  rA  s	            r-   r?   #IJepaForImageClassification.forward  s      /3jj/
%=/
 /

 "33!5!5!!5!<=%%fdkkLVLD$!//))	
 	
r/   )r=  r   r;  r6  )rB   rC   rD   rE   r   r   r   r   rG   rH   rI   r   r   r
   r?   rJ   rK   rL   s   @r-   r9  r9    s    
{ 
  -1&*04	
llT)
 t#
 #'+	

 +,
 

  
r/   r9  )r   r!  r9  )Nr   )3collections.abcr$   r   rG   torch.nnr(    r   r   activationsr   modeling_layersr   modeling_outputsr   r	   r
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.genericr   r   utils.output_capturingr   configuration_ijepar   Moduler   rN   rH   floatr   r   r   r   r   r   r   r   r	  r  r!  r9  __all__r   r/   r-   <module>rU     s    $   & ! 9 b b F & B B I 5 ,$299 $NNbii Nn !%II%<<% 
% <<	%
 LL4'% T\% % '(%84. 4.nbii "RYY  		 
")) 
+ D /? / /@@299 @$"))  7j% 7j 7jt .
"6 .
.
b Pr/   