
    Z j                        S r SSKrSSKJr  SSKJr  SSKJr  SSK	J
r
JrJr  SSKJrJr  SS	KJrJr   " S
 S\SS9r SS\S\S\S\S\S\\\4   4S jjrSSS\SS4S jr S SSS\S\S\S   4S jjr\ " S S\5      5       rS/rg)!z"Image processor class for SigLIP2.    N)
functional   )TorchvisionBackend)BatchFeature)
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringc                   .    \ rS rSr% Sr\\S'   \\S'   Srg)Siglip2ImageProcessorKwargs   aZ  
patch_size (`int`, *optional*, defaults to `self.patch_size`):
    The size (resolution) of each patch the image will be split to.
max_num_patches (`int`, *optional*, defaults to `self.max_num_patches`):
    The image will be resized to have at most this number of patches,
    and then padded in "patch" dimension to match this number exactly.

patch_sizemax_num_patches N)__name__
__module____qualname____firstlineno____doc__int__annotations____static_attributes__r       څ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/siglip2/image_processing_siglip2.pyr   r      s     Or   r   F)totalimage_heightimage_widthr   r   epsreturnc                   ^ SSK mS[        S[        S[        S[        4U4S jjnUS-  S	pvXv-
  U:  a6  Xg-   S
-  nU" XU5      n	U" XU5      n
X-  X-  -  nX::  a  UnOUnXv-
  U:  a  M6  UnU" XU5      n	U" XU5      n
X4$ )a  
Determine image size based on max number of patches, ensure dimensions are divisible by patch size and image is at least 1 patch.

Args:
    image_height (`int`):
        Original image height.
    image_width (`int`):
        Original image width.
    patch_size (`int`):
        Patch size for processing.
    max_num_patches (`int`):
        Maximum number of patches.
    eps (`float`):
        Small threshold for binary search.

Returns:
    Tuple: (target_height, target_width)
r   Nscalesizer   r"   c                 d   > X-  nTR                  X2-  5      U-  n[        X#5      n[        U5      $ N)ceilmaxr   )r$   r%   r   scaled_sizemaths       r   get_scaled_image_sizeAget_image_size_for_max_num_patches.<locals>.get_scaled_image_sizeA   s5    lii 89JF*2;r   
   g      Y@   )r+   floatr   )r   r    r   r   r!   r,   	scale_min	scale_maxr$   target_heighttarget_widthnum_patchesr+   s               @r   "get_image_size_for_max_num_patchesr6   *   s    *  U  #  3  3   8Uy S
(&!+-e:N,UL$1l6OP)II  S
( E)%zJM(ZHL&&r   imagetorch.Tensorc                     U R                   u  p#nX1-  nXA-  nU R                  X%XU5      nUR                  SSSSS5      nUR                  XV-  S5      nU$ )z
Convert 3D tensor image of shape (num_channels, image_height, image_width) into 2D tensor of patches of shape
(num_patches_height * num_patches_width, patch_size * patch_size * num_channels).
   r   r/      r   )shapereshapepermute)r7   r   num_channelsr   r    num_patches_heightnum_patches_widthpatched_images           r   convert_image_to_patchesrD   Z   sl    
 /4kk+L%3#1MM,JcmnM!))!Q1a8M!))*<*PRTUMr   tensortarget_length	pad_value)r8   r8   c                    U R                   S   nX-
  n[        R                  " U4[        R                  S9nUS:  aG  SS/U R                  S-
  -  SU/-   n[        R
                  R                  R                  XSUS9n SXT* S& X4$ )z+
Pad the tensor along the first dimension.
r   )dtyper:   constant)modevalueN)r=   torchonesint32ndimnnr   pad)rE   rF   rG   current_lengthpadding_lengthmaskpaddings          r   pad_along_first_dimrW   h   s     \\!_N"3N::}&ekk:Da&FKK!O,>/BB$$((zQZ([!"_<r   c                   .  ^  \ rS rSr\r\R                  r/ SQr	/ SQr
SrSrSrSrSr/ SQrS\\   4U 4S jjr\S	\S\\   S
\4U 4S jj5       rS
\4U 4S jjrS	\S   S\S\S\SSS\S\S\S\\\   -  S-  S\\\   -  S-  S\\-  S-  S
\4S jrSr U =r!$ )Siglip2ImageProcessorx   )      ?r[   r[   T      pixel_valuespixel_attention_maskspatial_shapeskwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__selfrb   	__class__s     r   re   Siglip2ImageProcessor.__init__   s    "6"r   imagesr"   c                 &   > [         TU ]  " U40 UD6$ r'   )rd   
preprocess)rg   rj   rb   rh   s      r   rl    Siglip2ImageProcessor.preprocess   s    w!&3F33r   c                 H   > UR                  SS 5        [        TU ]  " S0 UD6$ )N	do_resizer   )poprd   _validate_preprocess_kwargsrf   s     r   rq   1Siglip2ImageProcessor._validate_preprocess_kwargs   s$    

;%w2<V<<r   r8   ro   r   r   resamplez7PILImageResampling | tvF.InterpolationMode | int | None
do_rescalerescale_factordo_normalize
image_meanN	image_stdreturn_tensorsc           	         / n/ n/ nU H  nU(       aD  [        UR                  S   UR                  S   UUS9u  nn[        UUS9nU R                  UUUS9nU R	                  UXgXU
5      n[        UU5      n[        UU5      u  nnUR                  S   U-  nUR                  S   U-  nUR                  UU45        UR                  U5        UR                  U5        M     [        UUUS.US9nU$ )Nr<   )r   r    r   r   )heightwidth)r7   r%   rs   r^   )datatensor_type)	r6   r=   r	   resizerescale_and_normalizerD   rW   appendr   )rg   rj   ro   r   r   rs   rt   ru   rv   rw   rx   ry   rb   pixel_masksr_   ra   r7   r|   r}   	size_dictpatchesrU   rA   rB   batch_features                            r   _preprocess!Siglip2ImageProcessor._preprocess   s     E B!&R %B)$3	! %F%@	%i(S..ujR^luvE /ujAG/IMGT!&RJ!> %B: =!!#57H"IJ(t$- 0 % ,(3"0
 '
 r   r   )"r   r   r   r   r   valid_kwargsr   BILINEARrs   rw   rx   ro   rt   rv   r   r   model_input_namesr   re   r   r   r   rl   tuplerq   listboolr   r0   strr   r   r   __classcell__)rh   s   @r   rY   rY   x   s6   .L!**H JIIJLJOR#(C!D # 4 4v>Y7Z 4_k 4 4=u =
3^$3 3 	3
 3 L3 3 3 3 DK'$.3 4;&-3 j(4/3 
3 3r   rY   )gh㈵>)r   )r   rM   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   image_utilsr   r   r	   processing_utilsr
   r   utilsr   r   r   r   r0   r   r6   rD   rW   rY   __all__r   r   r   <module>r      s    )  7 ; 2 C C 4
,e 
 ^b-'-'$'-'58-'KN-'UZ-'
38_-'`N    BC+.;>
)*  K. K K\ #
#r   