
    Z j                         S SK r S SKJr  S SKJr  SSKJr  SSKJ	r	J
r
  SSKJr  SSKJrJrJr  SS	KJrJrJrJr   " S
 S\5      r\ " S S\5      5       r " S S\\5      r\" SS9 " S S\\5      5       r/ SQrg)    N)IJepaConfig   )initialization)BaseModelOutputWithPoolingImageClassifierOutput)Unpack)TransformersKwargsauto_docstring	torch_int   )ViTEmbeddingsViTForImageClassificationViTModelViTPreTrainedModelc            	          ^  \ rS rSrSS\S\SS4U 4S jjjrS\R                  S\	S	\	S\R                  4S
 jr
  SS\R                  S\R                  S-  S\S\R                  4S jjrSrU =r$ )IJepaEmbeddings   configuse_mask_tokenreturnNc                    > [         TU ]  X5        U ?U R                  R                  n[
        R                  " [        R                  " SX1R                  5      5      U l
        g )N   )super__init__	cls_tokenpatch_embeddingsnum_patchesnn	Parametertorchrandnhidden_sizeposition_embeddings)selfr   r   r   	__class__s       x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/ijepa/modular_ijepa.pyr   IJepaEmbeddings.__init__   sH    0N++77#%<<A{L^L^0_#`     
embeddingsheightwidthc                 ,   UR                   S   nU R                  R                   S   n[        R                  R	                  5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  nUR                   S   nX R
                  -  nX0R
                  -  n	[        US-  5      n
UR                  SXU5      nUR                  SSSS5      n[        R                  R                  UX4SSS	9nUR                  SSSS5      R                  SSU5      nU$ )
a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support torch.jit tracing.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
r   g      ?r   r   r   bicubicF)sizemodealign_corners)shaper#   r    jit
is_tracing
patch_sizer   reshapepermuter   
functionalinterpolateview)r$   r)   r*   r+   r   num_positionspatch_pos_embeddim
new_height	new_widthsqrt_num_positionss              r&   interpolate_pos_encoding(IJepaEmbeddings.interpolate_pos_encoding   s    !&&q)0066q9 yy##%%+*F6?+++22r".
__,	&}c'9:)11!5G]`a)11!Q1=--33(	 4 
 *11!Q1=BB1b#Nr(   pixel_valuesbool_masked_posrA   c                 n   UR                   u  pEpgU R                  XS9nUbX  UR                   S   n	U R                  R                  XIS5      n
UR	                  S5      R                  U
5      nUSU-
  -  X-  -   nU(       a  XR                  XU5      -   nOXR                  -   nU R                  U5      nU$ )N)rA   r   r-   g      ?)	r2   r   
mask_tokenexpand	unsqueezetype_asrA   r#   dropout)r$   rC   rD   rA   
batch_size_r*   r+   r)   
seq_lengthmask_tokensmasks               r&   forwardIJepaEmbeddings.forward<   s     (4'9'9$
v**<*k
&#))!,J//00LK",,R088ED#sTz2[5GGJ $#&C&CJX]&^^J#&>&>>J\\*-
r(   )r#   )F)NF)__name__
__module____qualname____firstlineno__r   boolr   r    TensorintrA   
BoolTensorrP   __static_attributes____classcell__r%   s   @r&   r   r      s    a{ aD aT a a%5<< % %UX %]b]i]i %T 48).	ll ))D0 #'	
 
 r(   r   c                       \ rS rSr\R
                  " 5       S\R                  \R                  -  \R                  -  SS4S j5       r
Srg)IJepaPreTrainedModelW   moduler   Nc                    [        U[        R                  [        R                  45      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        gg[        U[        R                  5      (       aA  [        R                  " UR                  5        [        R                  " UR                  5        g[        U[        5      (       ac  [        R
                  " UR                  SU R                  R                  S9  UR                  b!  [        R                  " UR                  5        ggg)zInitialize the weightsg        )meanstdN)
isinstancer   LinearConv2dinittrunc_normal_weightr   initializer_rangebiaszeros_	LayerNormones_r   r#   rF   )r$   r`   s     r&   _init_weights"IJepaPreTrainedModel._init_weightsY   s     fryy"))455v}}3DKK<Y<YZ{{&FKK( '--KK$JJv}}%00v99IfIfg  ,F--. - 1r(    )rR   rS   rT   rU   r    no_gradr   re   rf   rm   ro   rZ   rq   r(   r&   r^   r^   W   s>    
]]_/BII		$9BLL$H /T / /r(   r^   c                   <   ^  \ rS rSrSS\S\S\4U 4S jjjrSrU =r$ )
IJepaModeli   r   add_pooling_layerr   c                 L   > [         TU ]  U5        Xl        [        XS9U l        g)z
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
use_mask_token (`bool`, *optional*, defaults to `False`):
    Whether to use a mask token for masked image modeling.
)r   N)r   r   r   r   r)   )r$   r   rv   r   r%   s       r&   r   IJepaModel.__init__j   s#     	 )&Pr(   )r   r)   )FF)	rR   rS   rT   rU   r   rV   r   rZ   r[   r\   s   @r&   rt   rt   i   s(    	Q{ 	Qt 	Q]a 	Q 	Qr(   rt   a  
    IJepa Model transformer with an image classification head on top (a linear layer on top of the final hidden states)
    e.g. for ImageNet.

    <Tip>

        Note that it's possible to fine-tune IJepa on higher resolution images than the ones it has been trained on, by
        setting `interpolate_pos_encoding` to `True` in the forward of the model. This will interpolate the pre-trained
        position embeddings to the higher resolution.

    </Tip>
    )custom_introc                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S-  S\R                  S-  S\S-  S\	\
   S	\4
S
 jjrSrU =r$ )IJepaForImageClassificationv   r   c                 b   > [         TU ]  U5        [        USS9U l        U R	                  5         g )NF)rv   )r   r   rt   ijepa	post_init)r$   r   r%   s     r&   r   $IJepaForImageClassification.__init__   s(     %@
r(   NrC   labelsrA   kwargsr   c                    U R                   " U4SU0UD6nUR                  nU R                  UR                  SS95      nSnUb  U R                  " X'U R
                  40 UD6n[        UUUR                  UR                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
rA   r   )r=   N)losslogitshidden_states
attentions)	r~   last_hidden_state
classifierrb   loss_functionr   r   r   r   )	r$   rC   r   rA   r   outputssequence_outputr   r   s	            r&   rP   #IJepaForImageClassification.forward   s     /3jj/
%=/
 /

 "33!5!5!!5!<=%%fdkkLVLD$!//))	
 	
r(   )r~   )NNN)rR   rS   rT   rU   r   r   r    rW   rV   r   r	   r   rP   rZ   r[   r\   s   @r&   r{   r{   v   sp    {  -1&*04	
llT)
 t#
 #'+	

 +,
 

 
r(   r{   )r^   rt   r{   )r    torch.nnr   -transformers.models.ijepa.configuration_ijepar    r   rg   modeling_outputsr   r   processing_utilsr   utilsr	   r
   r   vit.modeling_vitr   r   r   r   r   r^   rt   r{   __all__rq   r(   r&   <module>r      s      E & Q & B B e eGm GT /- / /"
Q%x 
Q %
"68Q %
%
Pr(   