
    Z j$/                        S r SSKrSSKJr  SSKJr  SSKJr  SSKJrJ	r	J
r
  SSKJr  SS	KJrJrJr  SS
KJrJr  SSKJr  SSKJr  SSKJrJrJr  SSKJrJrJrJ r   \" SS9\ " S S\5      5       5       r! " S S\5      r" " S S\RF                  5      r$ " S S\ 5      r% " S S\5      r& " S S\5      r' " S S\5      r( " S  S!\5      r) " S" S#\5      r* " S$ S%\*5      r+\ " S& S'\*5      5       r,\" S(S)9 " S* S+\5      5       r-/ S,Qr.g)-zPyTorch Pixio model.    N)strict)nn   )GradientCheckpointingLayer)BackboneOutputBaseModelOutputBaseModelOutputWithPooling)Unpack)TransformersKwargsauto_docstring
is_tracing)can_return_tuplemerge_with_config_defaults)capture_outputs   )Dinov2Config)Dinov2BackboneDinov2DropPath	Dinov2MLP)ViTAttentionViTPatchEmbeddingsViTPreTrainedModelViTSelfAttentionzfacebook/pixio-huge)
checkpointc                       \ rS rSr% SrSrSr\\S'   Sr	\\S'   Sr
\\S	'   S
r\\S'   Sr\\\   -  \\\4   -  \S'   Sr\\\   -  \\\4   -  \S'   \" 5       r\" 5       r\" 5       rSrg)PixioConfig#   a  
apply_layernorm (`bool`, *optional*, defaults to `True`):
    Whether to apply layer normalization to the feature maps in case the model is used as backbone.
reshape_hidden_states (`bool`, *optional*, defaults to `True`):
    Whether to reshape the feature maps to 4D tensors of shape `(batch_size, hidden_size, height, width)` in
    case the model is used as backbone. If `False`, the feature maps will be 3D tensors of shape `(batch_size,
    seq_len, hidden_size)`.
n_cls_tokens (`int`, *optional*, defaults to 8):
    Number of class tokens in the Transformer encoder.

Example:

```python
>>> from transformers import PixioConfig, PixioModel

>>> # Initializing a Pixio pixio-huge style configuration
>>> configuration = PixioConfig()

>>> # Initializing a model (with random weights) from the pixio-huge style configuration
>>> model = PixioModel(configuration)

>>> # Accessing the model configuration
>>> configuration = model.config
```pixioi   hidden_size    num_hidden_layers   num_attention_heads   n_cls_tokens   
image_size
patch_size N)__name__
__module____qualname____firstlineno____doc__
model_typer   int__annotations__r!   r#   r%   r'   listtupler(   AttributeErrorlayerscale_valueuse_swiglu_ffnuse_mask_token__static_attributes__r)       x/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pixio/modular_pixio.pyr   r   #   s    2 JKs!!L#47Jd3i%S/1746Jd3i%S/16%'#%N#%Nr9   r   c                       \ rS rSrSrg)PixioPatchEmbeddingsM   r)   Nr*   r+   r,   r-   r8   r)   r9   r:   r<   r<   M       r9   r<   c                      ^  \ rS rSrSrS\SS4U 4S jjrS\R                  S\	S	\	S\R                  4S
 jr
S\R                  S\R                  4S jrSrU =r$ )PixioEmbeddingsQ   z:
Construct the CLS tokens, position and patch embeddings.
configreturnNc                 .  > [         TU ]  5         [        R                  " [        R
                  " SUR                  UR                  5      5      U l        S U l	        [        U5      U l        U R                  R                  n[        R                  " [        R
                  " SX!R                  -   UR                  5      5      U l        [        R                  " UR                  5      U l        UR                  U l        UR"                  U l        Xl        g )N   )super__init__r   	Parametertorchrandnr%   r   	cls_token
mask_tokenr<   patch_embeddingsnum_patchesposition_embeddingsDropouthidden_dropout_probdropoutr(   rC   )selfrC   rO   	__class__s      r:   rH   PixioEmbeddings.__init__V   s    ekk!V5H5H&J\J\&]^ 4V <++77#%<<A{M`M`?`bhbtbt0u#v zz&"<"<="// ++r9   
embeddingsheightwidthc                 0   UR                   S   U R                  -
  nU R                  R                   S   U R                  -
  n[        5       (       d  XE:X  a  X#:X  a  U R                  $ U R                  SS2SU R                  24   nU R                  SS2U R                  S24   nUR                   S   nX R                  -  n	X0R                  -  n
[        US-  5      nUR                  SXU5      nUR                  SSSS5      nUR                  n[        R                  R                  UR                  [        R                  5      X4SS	S
9R                  US9nUR                  SSSS5      R                  SSU5      n[        R                   " Xg4SS9$ )a  
This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher resolution
images. This method is also adapted to support tracing and interpolation at torch.float32 precision.

Adapted from:
- https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174-L194, and
- https://github.com/facebookresearch/dinov2/blob/e1277af2ba9496fbadf7aec6eba56e8d882d1e35/dinov2/models/vision_transformer.py#L179-L211
rF   Ng      ?r   r   r   bicubicF)sizemodealign_cornersdtypedim)shaper%   rP   r   r(   r0   reshapepermutera   r   
functionalinterpolatetorJ   float32viewcat)rT   rW   rX   rY   rO   num_positionsclass_pos_embedpatch_pos_embedrc   
new_height	new_widthsqrt_num_positionstarget_dtypes                r:   interpolate_pos_encoding(PixioEmbeddings.interpolate_pos_encodingc   s    !&&q)D,=,==0066q9D<M<MM|| <+++2216I8I8I6I3IJ221d6G6G6I3IJr".
__,	 !34)11!5G]`a)11!Q1=&,,--33u}}-(	 4 

 "<"
  	 *11!Q1=BB1b#Nyy/;CCr9   pixel_valuesc                 d   UR                   u  p#pEU R                  R                  R                  R                  nU R                  UR                  US95      nU R                  R                  USS5      n[        R                  " X4SS9nXpR                  XtU5      -   nU R                  U5      nU$ )Nr`   r[   rF   rb   )rd   rN   
projectionweightra   ri   rL   expandrJ   rl   rt   rS   )	rT   rv   
batch_size_rX   rY   rs   rW   
cls_tokenss	            r:   forwardPixioEmbeddings.forward   s    '3'9'9$
v,,77>>DD**<???+NO
^^**:r2>
YY
7Q?
"?"?
TY"ZZ
\\*-
r9   )rL   rC   rS   rM   r%   rN   r(   rP   )r*   r+   r,   r-   r.   r   rH   rJ   Tensorr0   rt   r~   r8   __classcell__rU   s   @r:   rA   rA   Q   sn    { t $D5<< $D $DUX $D]b]i]i $DLELL U\\  r9   rA   c                       \ rS rSrSrg)PixioSelfAttention   r)   Nr>   r)   r9   r:   r   r      r?   r9   r   c                   0   ^  \ rS rSrS\4U 4S jjrSrU =r$ )PixioAttention   rC   c                 D   > [         TU ]  U5        [        U5      U l        g N)rG   rH   r   	attentionrT   rC   rU   s     r:   rH   PixioAttention.__init__   s     +F3r9   )r   )r*   r+   r,   r-   r   rH   r8   r   r   s   @r:   r   r      s    4{ 4 4r9   r   c                       \ rS rSrSrg)PixioDropPath   r)   Nr>   r)   r9   r:   r   r      r?   r9   r   c                       \ rS rSrSrg)PixioMLP   r)   Nr>   r)   r9   r:   r   r      r?   r9   r   c                   x   ^  \ rS rSrS\SS4U 4S jjrS\R                  S\\	   S\R                  4S jr
S	rU =r$ )

PixioLayer   rC   rD   Nc                   > [         TU ]  5         [        R                  " UR                  UR
                  S9U l        [        U5      U l        UR                  S:  a  [        UR                  5      O[        R                  " 5       U l        [        R                  " UR                  UR
                  S9U l        [        U5      U l        g )Nepsg        )rG   rH   r   	LayerNormr   layer_norm_epsnorm1r   r   drop_path_rater   Identity	drop_pathnorm2r   mlpr   s     r:   rH   PixioLayer.__init__   s    \\&"4"4&:O:OP
'/AGAVAVY\A\v'<'<=bdbmbmbo\\&"4"4&:O:OP
F#r9   hidden_stateskwargsc                     U R                  U5      nU R                  " U40 UD6nU R                  U5      U-   nU R                  U5      nU R	                  U5      nU R                  U5      U-   nU$ r   )r   r   r   r   r   )rT   r   r   hidden_states_normself_attention_outputlayer_outputs         r:   r~   PixioLayer.forward   so    !ZZ6 $/A LV L'<=Mzz-0xx-~~l3mCr9   )r   r   r   r   r   )r*   r+   r,   r-   r   rH   rJ   r   r
   r   r~   r8   r   r   s   @r:   r   r      sF    ${ $t $U\\ VDV=W \a\h\h  r9   r   c                       \ rS rSr\\S.rSrg)PixioPreTrainedModel   )r   
attentionsr)   N)r*   r+   r,   r-   r   r   _can_record_outputsr8   r)   r9   r:   r   r      s    #(r9   r   c                   |   ^  \ rS rSrS\4U 4S jjr\\" SS9S\R                  S\
\   S\4S	 j5       5       rS
rU =r$ )PixioEncoder   rC   c                    > [         TU ]  U5        [        R                  " [	        UR
                  5       Vs/ s H  n[        U5      PM     sn5      U l        SU l        U R                  5         g s  snf )NF)
rG   rH   r   
ModuleListranger!   r   layergradient_checkpointing	post_init)rT   rC   r|   rU   s      r:   rH   PixioEncoder.__init__   sY     ]]fF^F^@_#`@_1Jv$6@_#`a
&+# $as   A1F)tie_last_hidden_statesr   r   rD   c                 L    U R                    H  nU" U40 UD6nM     [        US9$ )N)last_hidden_state)r   r   )rT   r   r   layer_modules       r:   r~   PixioEncoder.forward   s.     !JJL(A&AM ' ??r9   )r   r   )r*   r+   r,   r-   r   rH   r   r   rJ   r   r
   r   r   r~   r8   r   r   s   @r:   r   r      sU    {   E2@U\\ @VDV=W @\k @ 3  @r9   r   c            	          ^  \ rS rSrS\4U 4S jjrS\4S jr\\	 SS\
R                  S-  S\\   S\4S	 jj5       5       rS
rU =r$ )
PixioModel   rC   c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        R                  " UR                  UR                  S9U l        U R                  5         g )Nr   )rG   rH   rC   rA   rW   r   encoderr   r   r   r   	layernormr   r   s     r:   rH   PixioModel.__init__   sU     )&1#F+f&8&8f>S>STr9   rD   c                 .    U R                   R                  $ r   )rW   rN   )rT   s    r:   get_input_embeddingsPixioModel.get_input_embeddings   s    ///r9   Nrv   r   c                 >   Uc  [        S5      eU R                  U5      nU R                  " U40 UD6nUR                  nU R	                  U5      nUS S 2S U R                  R
                  2S S 24   R                  SS9n[        UUUR                  UR                  S9$ )Nz You have to specify pixel_valuesrF   rb   )r   pooler_outputr   r   )

ValueErrorrW   r   r   r   r%   meanr	   r   r   )rT   rv   r   embedding_outputencoder_outputssequence_outputpooled_outputs          r:   r~   PixioModel.forward   s     ?@@??<8+/<<8H+SF+S);;..9'+IT__-I-I+I1(LMRRWXRY)-')77&11	
 	
r9   )rC   rW   r   r   r   )r*   r+   r,   r-   r   rH   r<   r   r   r   rJ   r   r
   r   r	   r~   r8   r   r   s   @r:   r   r      sh    	{ 	0&: 0  -1
llT)
 +,
 
$	
  
r9   r   zN
    Pixio backbone, to be used with frameworks like DETR and MaskFormer.
    )custom_introc                   D    \ rS rSrS\R
                  S\\   S\4S jr	Sr
g)PixioBackbonei  rv   r   rD   c                    SUS'   U R                  U5      nU R                  " U40 UD6nUR                  n/ n[        U R                  U5       H  u  pxXpR
                  ;   d  M  U R                  R                  (       a  U R                  U5      nU R                  R                  (       a~  USS2U R                   R                  S24   nUR                  u  ppU R                  R                  nUR                  XU-  X-  S5      nUR                  SSSS5      R                  5       nUR!                  U5        M     [#        [%        U5      UUR&                  S	9$ )
a  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))

>>> processor = AutoImageProcessor.from_pretrained("facebook/pixio-huge")
>>> model = AutoBackbone.from_pretrained(
...     "facebook/pixio-huge", out_features=["stage7", "stage15", "stage23", "stage31"]
... )

>>> inputs = processor(image, return_tensors="pt")

>>> outputs = model(**inputs)
>>> feature_maps = outputs.feature_maps
>>> list(feature_maps[-1].shape)
[1, 1280, 16, 16]
```Toutput_hidden_statesNr[   r   r   rF   r   )feature_mapsr   r   )rW   r   r   zipstage_namesout_featuresrC   apply_layernormr   reshape_hidden_statesr%   rd   r(   re   rf   
contiguousappendr   r3   r   )rT   rv   r   r   outputr   r   stagehidden_stater{   r|   rX   rY   r(   s                 r:   r~   PixioBackbone.forward	  s:   6 *.%&??<8"&,,/?"J6"J,,#&t'7'7#GE)));;..#'>>,#?L;;44#/4??3O3O3Q0Q#RL3?3E3E0J6!%!7!7J#/#7#7
jDXZ_Zmoq#rL#/#7#71a#C#N#N#PL##L1 $H |,'((
 	
r9   r)   N)r*   r+   r,   r-   rJ   r   r
   r   r   r~   r8   r)   r9   r:   r   r     s'    2
ELL 2
FCU<V 2
[i 2
r9   r   )r   r   r   r   )/r.   rJ   huggingface_hub.dataclassesr   r   modeling_layersr   modeling_outputsr   r   r	   processing_utilsr
   utilsr   r   r   utils.genericr   r   utils.output_capturingr   dinov2.configuration_dinov2r   dinov2.modeling_dinov2r   r   r   vit.modeling_vitr   r   r   r   r   r<   ModulerA   r   r   r   r   r   r   r   r   r   __all__r)   r9   r:   <module>r      s0     .  9 [ [ & C C I 5 6 
 f e 01%&, %&  2%&P	- 	Dbii DN	) 	4\ 4	N 		y 	+ 2- @' @  %
% %
 %
P 
3
N 3

3
l Qr9   