
    Z jB                        S r SSKrSSKJr  SSKJr  SSKrSSKJr	  SSK
Jr  SSKJr  SS	KJrJrJr  SS
KJrJrJr  SSKJrJr  SSKJrJr  S\S\\   4S jrS\\\4   S\\\4   S\\\4   4S jr \" SS9S\S\S\RB                  4S j5       r" S%SSS\\\4   S\\\\\4   -  SS4S jjr# S&S\\\4   S\RB                  S\$S\\\4   4S jjr% " S  S!\SS"9r&\ " S# S$\5      5       r'S$/r(g)'z!Image processor class for Llama4.    N)defaultdict)	lru_cache)
functional   )TorchvisionBackend)BatchFeature)group_images_by_shapereorder_imagessplit_to_tiles)
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringdividendreturnc                     [        5       n[        S[        U S-  5      S-   5       H1  nX-  S:X  d  M  UR                  U5        UR                  X-  5        M3     U$ )a  
Calculate all factors of a given number, i.e. a divisor that leaves
no remainder. For example, if dividend=12, it will return {1, 2, 3, 4, 6, 12}.

Args:
    dividend (int): The number to find factors for.

Returns:
    set: A set containing all factors of the number.
         ?r   )setrangeintadd)r   factors_setis      ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/llama4/image_processing_llama4.pyget_factorsr      sV     %K1c(C-(1,-<1OOAOOHM* .     
image_sizetarget_sizec                     U u  p#Uu  pEXS-  nXB-  nXg:  a'  Un[        [        R                  " X&-  5      U5      n	X4$ Un	[        [        R                  " X7-  5      U5      nX4$ )a  
Determines the maximum resolution to which an image can be resized to without distorting its
aspect ratio, based on the target resolution.

Args:
    image_size (tuple[int, int]): The original resolution of the image (height, width).
    target_resolution (tuple[int, int]): The desired resolution to fit the image into (height, width).
Returns:
    tuple[int, int]: The optimal dimensions (height, width) to which the image should be resized.
Example:
    >>> _get_max_res_without_distortion([200, 300], target_size = [450, 200])
    (134, 200)
    >>> _get_max_res_without_distortion([800, 600], target_size = [450, 1300])
    (450, 338)
)minmathfloor)
r!   r"   original_heightoriginal_widthtarget_heighttarget_widthscale_wscale_h	new_width
new_heights
             r   get_max_res_without_distortionr/   3   sz    ( '1#O"-M+G-G 	O$=>N

    #


>#;<lK	  r    r   )maxsizemax_num_chunks
patch_sizec                    UR                   UR                  p2X#:w  a  [        S5      eUn[        [        5      n[        U SS5       HO  n[        [        U5      5      nU Vs/ s H  owXW-  4PM
     nnU H  u  p#X#-  n	XI   R                  X#45        M     MQ     / n
UR                  5        H%  nU H  u  p,U
R                  X!-  X-  45        M     M'     U
$ s  snf )a  
Computes all of the allowed resolutions for a fixed number of chunks
and patch_size. Useful for when dividing an image into chunks.

Args:
    max_num_chunks (int): Maximum number of chunks for processing.
    patch_size (int): Size of the side of the patch.

Returns:
    torch.Tensor: List of possible resolutions as tuples (height, width).

Example:
    >>> max_num_chunks = 5
    >>> patch_size = 224
    >>> find_supported_resolutions(max_num_chunks, patch_size)
    tensor([(224, 896), (448, 448), (224, 224), (896, 224), (224, 672),
    (672, 224), (224, 448), (448, 224)])

    Given max_num_chunks=4, patch_size=224, it will create a dictionary:
    {
    0.25: [(1, 4)],
    1.0: [(2, 2), (1, 1)],
    4.0: [(4, 1)],
    0.33: [(1, 3)],
    3.0: [(3, 1)],
    0.5: [(1, 2)],
    2.0: [(2, 1)]
    }

    and return the resolutions multiplied by the patch_size:
    [(1*224, 4*224), (2*224, 2*224), ..., (2*224, 1*224)]
z`size` must be square.r   )
heightwidth
ValueErrorr   listr   sortedr   appendvalues)r1   r2   r5   r6   asp_dict
chunk_size_factorsfactor_asp_ratiosratio_floatpossible_resolutionsvaluedepths                r   find_supported_resolutionsrE   W   s    D %%z'7'7E122J4 HNAr2
+j12DLMH&
 45HM(MF .K!((&9 ) 3 ""MF '')<e>P(QR # #   Ns   !Cimagestorch.Tensorbackground_colorc                 b   [        U R                  5      S:X  a  U R                  S   OU R                  S   n[        U[        5      (       a  U/S/US-
  -  -   nO[        U5      U:w  a  [	        SU S35      eU R                  SS u  pEUu  pgXu-
  nXd-
  n	[
        R                  " U SSX/US9n
U
$ )	a  
Pads an image to fit the target size.

Args:
    images (`np.ndarray`):
        The images to pad.
    background_color (`int` or `tuple[int, int, int]`, *optional*, defaults to 0):
        The color to use for the padding. Can be an integer for single channel or a
        tuple of integers representing for multi-channel images. If passed as integer
        in multi-channel mode, it will default to `0` in subsequent channels.
Returns:
    `torch.Tensor`: The padded images.
   r   r   z(background_color must have no more than z) elements to match the number of channelsN)paddingfill)lenshape
isinstancer   r7   tvFpad)rF   r"   rH   num_channelsr5   r6   r)   r*   paste_x_rightpaste_y_rightpadded_imagess              r   pad_to_best_fitrW      s    & '*&,,&71&<6<<?&,,q/L"C((,-|a7G0HH		,	.6|nDmn
 	
 LL%MF"-M (M!*MGGFQ=,PWghMr    FrB   resize_to_max_canvasc                    U u  p4USS2S4   USS2S4   peXd-  nXS-  n[         R                  " X:  Xx5      n	XS:     n
[        U
5      S:  a5  U(       a  [         R                  " U
5      nO4[         R                  " U
5      nOXS:     n[         R                  " U5      nXU:H     n[        U5      S:  a/  USS2S4   USS2S4   -  n[         R
                  " U5      nX   nU$ US   nU$ )a  
Determines the best canvas possible from a list of possible resolutions to, without distortion,
resize an image to.

For each possible resolution, calculates the scaling factors for
width and height, and selects the smallest one, which is the limiting side.
E.g. to match the canvas you can upscale height by 2x, and width by 1.5x,
therefore, the maximum upscaling you can do is min(2, 1.5) = 1.5.

If upscaling is possible (any of the scaling factors is greater than 1),
then picks the smallest upscaling factor > 1, unless resize_to_max_canvas is True.

If upscaling is not possible, then picks the largest scaling factor <= 1, i.e.
reduce downscaling as much as possible.

If there are multiple resolutions with the same max scale, we pick the one with the lowest area,
to minimize padding. E.g., the same image can be upscaled to 224x224 and 224x448, but the latter
has more padding.

Args:
    image_size (tuple[int, int]): A tuple containing the height and width of the image.
    possible_resolutions (torch.Tensor): A tensor of shape (N, 2) where each
        row represents a possible resolution (height, width).
    resize_to_max_canvas (bool): If True, will return the largest upscaling resolution.

Returns:
    list[int]: The best resolution [height, width] for the given image.

Example:
    >>> image_size = (200, 300)
    >>> possible_resolutions = torch.tensor([[224, 672],
    ...                                     [672, 224],
    ...                                     [224, 448],
    ...                                     [448, 224],
    ...                                     [224, 224]])
    >>> get_best_fit(image_size, possible_resolutions)
    [224, 448]

    We have:
        scale_w = tensor([2.2400, 0.7467, 1.4933, 0.7467, 0.7467])
        scale_h = tensor([1.1200, 3.3600, 1.1200, 2.2400, 1.1200])
        scales = tensor([1.1200, 0.7467, 1.1200, 0.7467, 0.7467])
    Only one of the scales > 1:
        upscaling_possible = tensor([1.1200, 1.1200])
        smallest_rescale = tensor(1.1200)
    So we pick the resolution with the smallest smallest area:
        areas = tensor([150528, 100352]) # [672, 224], [224, 448]
        optimal_canvas = tensor([224, 448])
Nr   r   )torchwhererN   maxr$   argmin)r!   rB   rX   r'   r(   target_heightstarget_widthsr+   r,   scalesupscaling_optionsselected_scaledownscaling_optionschosen_canvasareasoptimal_idxoptimal_canvass                    r   get_best_fitrh      s   n '1#O 	QT"QT" " ,G.G [[*G=F {+
!"YY'89N"YY'89N %aZ0#67 )>)ABM =Aad#mAqD&99ll5)&3  'q)r    c                   .    \ rS rSr% Sr\\S'   \\S'   Srg)Llama4ImageProcessorKwargsi  a(  
max_patches (`int`, *optional*, defaults to 16):
    The maximum number of patches to be extracted from the image.
    Can be overridden by the `max_patches` parameter in the `preprocess` method.
resize_to_max_canvas (`bool`, *optional*, defaults to False):
    Whether to resize the image to the maximum canvas size.
    If True, picks the canvas the allows the largest resizing without distortion.
    If False, downsample as little as possible, including no resizing at all,
    but never upsample, unless the image is smaller than the patch size.
max_patchesrX    N)	__name__
__module____qualname____firstlineno____doc__r   __annotations__bool__static_attributes__rl   r    r   rj   rj     s    	 r    rj   )totalc                     ^  \ rS rSr\R
                  r/ SQr/ SQrSSS.r	Sr
SrSrSrSrSr\rS\\   4U 4S	 jjr\S
\S\\   S\4U 4S jj5       r\R2                  R4                  S
SS\S\S\S\\\   -  S\\\   -  SS4S j5       rS
\S   S\S\SSS\S\S\S\\\   -  S-  S\\\   -  S-  S\ S\S\S-  S\!\"-  S-  S\4S jr#Sr$U =r%$ )Llama4ImageProcessori'  )r   r   r   iP  r5   r6   T   Fkwargsc                 &   > [         TU ]  " S0 UD6  g )Nrl   )super__init__)selfrz   	__class__s     r   r}   Llama4ImageProcessor.__init__5  s    "6"r    rF   r   c                 &   > [         TU ]  " U40 UD6$ )N)r|   
preprocess)r~   rF   rz   r   s      r   r   Llama4ImageProcessor.preprocess8  s    w!&3F33r    rG   
do_rescalerescale_factordo_normalize
image_mean	image_stdc                     U(       a;  U(       a4  UR                  [        R                  S9U-  nU R                  XU5      nU$ U(       a  X-  nU$ U(       a  U R                  XU5      nU$ )z
Rescale and normalize images.
Override to rescale and normalize the images in torch.bfloat16 as in the original implementation
)dtype)torZ   bfloat16	normalize)r~   rF   r   r   r   r   r   s          r   rescale_and_normalize*Llama4ImageProcessor.rescale_and_normalize=  sg     ,YYU^^Y4~EF^^F	BF  ,F  ^^F	BFr    	do_resizesizeresamplez7PILImageResampling | tvF.InterpolationMode | int | NoneNrk   rX   disable_groupingreturn_tensorsc           	      f   [        XS9n[        R                  " XS   R                  S9n[	        XS9u  nn0 n0 nUR                  5        GH  u  nnUR                  SS  n[        UXS9nU(       a  S OUR                  nUb=  [        [        US   U5      US   5      n[        [        US   U5      US   5      nUU4nOUn[        UU5      n[        [        US   S5      [        US   S5      S9nU R                  UUUS	9n[        UU5      nU R                  UXVXxU	5      nUS   UR                  -  US   UR                   -  nn[#        UUU5      nUUU'   [        R                  " UU//UR                  S   -  US   R                  S9UU'   UU-  S:  d  GMR  U R                  UUUS	9n U R                  U XVXxU	5      n [        R$                  " UU R'                  S5      /SS
9UU'   GM     [)        UU5      n[)        UU5      n!U(       a  [        R$                  " USS
9OUn[+        UU!S.US9$ )N)r1   r2   r   )device)r   rK   )rX   r   rx   )r   )dim)pixel_valuesaspect_ratios)datatensor_type)rE   rZ   tensorr   r	   itemsrO   rh   r5   r$   r\   r/   r   resizerW   r   r6   r   cat	unsqueezer
   r   )"r~   rF   r   r   r   r   r   r   r   r   rk   rX   r   r   rz   rB   grouped_imagesgrouped_images_indexgrouped_processed_imagesgrouped_aspect_ratiosrO   stacked_imagesr!   r"   max_upscaling_sizenew_target_heightnew_target_widthtarget_size_without_distortionnew_size_without_distortionprocessed_imagesratio_hratio_wglobal_tilesr   s"                                     r   _preprocess Llama4ImageProcessor._preprocessU  s   "  :f$||,@PQIYIYZ/DV/o,,#%  "%3%9%9%;!E>'--bc2J&z3GsK)=4;;!-$'JqM;M(NP[\]P^$_!#&s:a=:L'M{[\~#^ 2CEU1V.1<. +IUs*t'*26q91=SIdefIgijEk+'  ${{+!  +    //?M#99 *lXa 
 A$++-A$**, G
  ..>Q.>$U++0<<7#$~';';A'>>vayGWGW,!%(
  1$#{{"%  +  
  $99 *lXa  38))=M|OeOefgOh<iop2q(/g &<h **BDXY&'<>RSAO599%51=Ue"2]Sao
 	
r    rl   )&rm   rn   ro   rp   r   BILINEARr   r   r   r   r   r   r   do_convert_rgbrk   rX   rj   valid_kwargsr   r}   r   r   r   r   rZ   compilerdisablers   floatr8   r   r   r   strr   r   rt   __classcell__)r   s   @r   rw   rw   '  s   !**H JIC(DIJLNK -L#(B!C # 4 4v>X7Y 4^j 4 4 ^^  	
  DK' 4;& 
 .Q
^$Q
 Q
 	Q

 LQ
 Q
 Q
 Q
 DK'$.Q
 4;&-Q
 Q
 #Q
 +Q
 j(4/Q
  
!Q
 Q
r    rw   )r   )F))rq   r%   collectionsr   	functoolsr   rZ   torchvision.transforms.v2r   rQ   image_processing_backendsr   image_processing_utilsr   image_transformsr	   r
   r   image_utilsr   r   r   processing_utilsr   r   utilsr   r   r   r   r   tupler/   TensorrE   rW   rs   rh   rj   rw   __all__rl   r    r   <module>r      sz   (  #   7 ; 2 U U C C 4 /# #c( (!!c3h!!sCx!! 38_!!H 15 s 5  5 U\\ 5  5 v 45!!sCx! E#sC-00! 	!N "'`c3h`,,` ` 38_	`FU   ~
- ~
 ~
B "
"r    