
    Z j&                     p   S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJrJr  S\\\4   S\\\4   S\4S jr SS\S\\\\4   -  \\   -  \\   -  S\\\\4   -  \\   -  \\   -  S\\-  S-  S\4
S jjr " S S\SS9r \ " S S\5      5       r!S/r"g)z"Image processor class for Pixtral.    N)
functional   )TorchvisionBackend)BatchFeatureget_size_dict)group_images_by_shapereorder_images)ChannelDimension
ImageInputPILImageResamplingSizeDictget_image_size)ImagesKwargsUnpack)
TensorTypeauto_docstring
image_size
patch_sizereturnc                     U u  p#[        U[        [        45      (       a  UOX4u  pEUS-
  U-  S-   nUS-
  U-  S-   nXv4$ )a,  
Calculate the number of image tokens given the image size and patch size.

Args:
    image_size (`tuple[int, int]`):
        The size of the image as `(height, width)`.
    patch_size (`tuple[int, int]`):
        The patch size as `(height, width)`.

Returns:
    `int`: The number of image tokens.
   )
isinstancetuplelist)r   r   heightwidthpatch_heightpatch_widthnum_width_tokensnum_height_tokenss           څ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pixtral/image_processing_pixtral.py_num_image_tokensr"      sZ     MF.8eT].S.S
ZdYqL	k1A5!4q8..    input_imagesizeinput_data_formatc                    [        U[        [        45      (       a  UOX4u  pE[        U[        [        45      (       a  UOX"4u  pg[        X5      u  p[	        X-  X-  5      n
U
S:  aB  [        [        R                  " X-  5      5      n[        [        R                  " X-  5      5      n	[        X4Xg45      u  pX-  X-  4$ )a  
Find the target (height, width) dimension of the output image after resizing given the input image and the desired
size.

Args:
    input_image (`ImageInput`):
        The image to resize.
    size (`int` or `tuple[int, int]`):
        Max image size an input image can be. Must be a dictionary with the key "longest_edge".
    patch_size (`int` or `tuple[int, int]`):
        The patch_size as `(height, width)` to use for resizing the image. If patch_size is an integer, `(patch_size, patch_size)`
        will be used
    input_data_format (`ChannelDimension`, *optional*):
        The channel dimension format of the input image. If unset, will use the inferred format from the input.

Returns:
    `tuple`: The target (height, width) dimension of the output image after resizing.
r   )	r   r   r   r   maxintmathfloorr"   )r$   r%   r   r&   
max_height	max_widthr   r   r   r   ratior    r   s                r!   get_resize_output_image_sizer/   1   s    0 %/teT]$C$CD$J.8eT].S.S
ZdYqL";BMF#U%67Eqy TZZ/0DJJu}-.*;VOlMh*i'+-=-KKKr#   c                   4    \ rS rSr% Sr\\\4   \-  \S'   Sr	g)PixtralImageProcessorKwargsY   z
patch_size (`Union[dict[str, int], int]` *optional*, defaults to `{"height": 16, "width": 16}`):
    Size of the patches in the model, used to calculate the output image size.
r    N)
__name__
__module____qualname____firstlineno____doc__dictstrr)   __annotations____static_attributes__r3   r#   r!   r1   r1   Y   s    
 S#X$$r#   r1   F)totalc                     ^  \ rS rSr\R
                  r/ SQr/ SQrSSS.r	SS0r
SrSrSrSrSr\rS	S
/rS\\   4U 4S jjr\S\S\\   S\4U 4S jj5       r S%SSS\S\SSSS4
U 4S jjjrS	\S   S
\\\\4      SS4S jr S%S\S   S\S\SSS\S\S\S\ S\S\ \\    -  S-  S \ \\    -  S-  S!\S-  S"\!\"-  S-  S\#\!\4   \-  S-  S\4S# jjr$S$r%U =r&$ )&PixtralImageProcessorb   )g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?   r   r   longest_edgei   Tpixel_valuesimage_sizeskwargsc                 &   > [         TU ]  " S0 UD6  g )Nr3   )super__init__)selfrF   	__class__s     r!   rI   PixtralImageProcessor.__init__r   s    "6"r#   imagesr   c                 &   > [         TU ]  " U40 UD6$ N)rH   
preprocess)rJ   rM   rF   rK   s      r!   rP    PixtralImageProcessor.preprocessu   s    w!&3F33r#   Nimageztorch.Tensorr%   r   resamplez7PILImageResampling | tvF.InterpolationMode | int | Nonec                   > UR                   (       a  UR                   UR                   4nOFUR                  (       a*  UR                  (       a  UR                  UR                  4nO[        S5      eUR                  (       a*  UR                  (       a  UR                  UR                  4nO[        S5      e[	        XUS9n[
        T	U ]  " U4[        US   US   S9US.UD6$ )a%  
Resize an image. The longest edge of the image is resized to size["longest_edge"], with the aspect ratio
preserved. Output dimensions are aligned to patch_size.

Args:
    image (`torch.Tensor`):
        Image to resize.
    size (`SizeDict`):
        Dict containing the longest possible edge of the image.
    patch_size (`SizeDict`):
        Patch size used to calculate the size of the output image.
    resample (`PILImageResampling | tvF.InterpolationMode | int | None`, *optional*):
        Resampling filter to use when resizing the image.
z@size must contain either 'longest_edge' or 'height' and 'width'.z-patch_size must contain 'height' and 'width'.)r%   r   r   r   rB   )r%   rS   )rC   r   r   
ValueErrorr/   rH   resizer   )
rJ   rR   r%   r   rS   rF   
size_tuplepatch_size_tupleoutput_sizerK   s
            r!   rV   PixtralImageProcessor.resizey   s    , ++T->->?J[[TZZ++tzz2J_``!1!1 * 1 1:3C3CDLMM25Vfgw~
 Ak!nMX`
dj
 	
r#   c                 8   [        S U 5       5      [        S U 5       5      4n[        X5       VVs/ s HC  u  pE[        R                  R                  R                  USUS   US   -
  SUS   US   -
  4S9PME     nnn[        R                  " U5      $ s  snnf )a;  
Pads images to form a batch of same shape.

Args:
    pixel_values (`list[torch.Tensor]`):
        A list of pixel values, each of shape (channels, height, width).
    image_sizes (`list[tuple[int, int]]`):
        A list of (height, width) for each image.

Returns:
    `torch.Tensor`: Stacked and padded images.
c              3   *   #    U  H	  oS    v   M     g7f)r   Nr3   .0ss     r!   	<genexpr>:PixtralImageProcessor._pad_for_batching.<locals>.<genexpr>   s     3{!1{   c              3   *   #    U  H	  oS    v   M     g7f)r   Nr3   r]   s     r!   r`   ra      s     8S{!1{rb   r   r   )pad)r(   ziptorchnnr   rd   stack)rJ   rD   rE   	max_shapeimgr%   paddeds          r!   _pad_for_batching'PixtralImageProcessor._pad_for_batching   s    " 3{33S8S{8S5ST	 !;
;	 HH##Ca1Q1GIVWL[_`a[bLb-c#d; 	 
 {{6""	
s   A
B	do_resizedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stddisable_groupingreturn_tensorsc           	      \   [        U=(       d    U R                  SS9n[        S0 UD6n[        XS9u  nn0 nUR	                  5        H$  u  nnU(       a  U R                  UUUUS9nUUU'   M&     [        UU5      n[        UUS9u  nn[        [        U5      5       Vs/ s H  nUU   S   PM     nn0 nUR	                  5        H8  u  nnU(       a  U R                  UU5      nU R                  UXxXU5      nUUU'   M:     [        UU5      nU R                  UUS9n[        UUS.US9$ s  snf )	NT)default_to_square)rv   )rR   r%   r   rS   r   )rD   rE   )datatensor_typer3   )r   r   r   r   itemsrV   r	   rangelencenter_croprescale_and_normalizerl   r   )rJ   rM   rn   r%   rS   ro   rp   rq   rr   rs   rt   ru   rv   rw   r   rF   patch_size_sdgrouped_imagesgrouped_images_indexresized_images_groupedshapestacked_imagesresized_imagesibatch_image_sizesprocessed_images_groupedprocessed_imagespadded_imagess                               r!   _preprocess!PixtralImageProcessor._preprocess   sy   $ #:#@TXY
 .:./DV/o,,!#%3%9%9%;!E>!%(tX` "- " -;"5) &< ((>@TU/D^fv/w,,AFsK_G`AabAaA1!4Q7Aab#% %3%9%9%;!E>!%!1!1.)!L!77
LV_N /=$U+ &< **BDXY..)) / 

 "/@QR`n
 	
# cs   D)r3   rO   )'r4   r5   r6   r7   r   BICUBICrS   rt   ru   r   r%   ry   rn   rq   rs   do_convert_rgbr1   valid_kwargsmodel_input_namesr   rI   r   r   r   rP   r   rV   r   r   r)   rl   boolfloatr:   r   r9   r   r<   __classcell__)rK   s   @r!   r?   r?   b   s   !))H4J4I,JD!DIJLN.L'7#(C!D # 4 4v>Y7Z 4_k 4 4 OS%
%
 %
 	%

 L%
 
%
 %
N#>*# %S/*# 
	#N 8<3
^$3
 3
 	3

 L3
 3
 3
 3
 3
 3
 DK'$.3
 4;&-3
 +3
 j(4/3
 cNX-43
" 
#3
 3
r#   r?   rO   )#r8   r*   rf   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   r   image_transformsr   r	   image_utilsr
   r   r   r   r   processing_utilsr   r   utilsr   r   r   r)   r"   r   r:   r/   r1   r?   __all__r3   r#   r!   <module>r      s   )   7 ; A E e e 4 //%S/ /uS#X /SV /0 8<	%L%L
c3h
$s)
+eCj
8%L eCHo%S	1E#J>%L --4	%L
 %LP%,e % H
. H
 H
V #
#r#   