
    Z j_,                     d   S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJrJrJrJrJr  SSKJrJr  SS	KJrJrJr  \" 5       (       a  SS
KJr  SSKJr  S\S\\\4   4S jr S\S\4S jr! " S S\SS9r"S\#\#S      S\\\4   4S jr$SSS\\\4   SS4S jr%\ " S S\5      5       r&S/r'g)z#Image processor class for Idefics2.    N   )TorchvisionBackend)BatchFeature)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STD
ImageInputPILImageResamplingSizeDictmake_nested_list_of_images)ImagesKwargsUnpack)
TensorTypeauto_docstringis_vision_available)Image)
functionalsizereturnc                     U R                   SS u  p#UR                  nUR                  nX2-  nX2:  a  X5:  a  Un[        X6-  5      nOX#:  a  X%:  a  Un[        X&-  5      n[	        X$5      n[	        X45      nX#4$ )z
Get the output size of the image after resizing given a dictionary specifying the max and min sizes.
Images are always channels-first (CHW).
N)shapeshortest_edgelongest_edgeintmax)imager   heightwidthmin_lenmax_lenaspect_ratios          ڇ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/idefics2/image_processing_idefics2.pyget_resize_output_image_sizer%   (   s    
 KK$MF  GG>L5?U)*	F,F)*!FE=    r   c                 >   [        5       (       a  [        U [        R                  5      (       d  U $ U R                  S:X  a  U $ U R	                  S5      n[        R
                  " SUR                  S5      n[        R                  " X!5      nUR	                  S5      nU$ )z|
Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
as is.
RGBRGBA)   r*   r*   )r   
isinstancer   modeconvertnewr   alpha_composite)r   
image_rgba
backgroundr/   s       r$   convert_to_rgbr2   >   s}    
   
5%++(F(FzzUv&J6:??ODJ++JCO%--e4Or&   c                   $    \ rS rSr% Sr\\S'   Srg)Idefics2ImageProcessorKwargsP   z
do_image_splitting (`bool`, *optional*, defaults to `self.do_image_splitting`):
    Whether to split the image into a sequence 4 equal sub-images concatenated with the original image.
do_image_splitting N)__name__
__module____qualname____firstlineno____doc__bool__annotations____static_attributes__r7   r&   r$   r4   r4   P   s    
 r&   r4   F)totalimages_listztorch.Tensor|np.ndarrayc                     / nU  H*  nU H!  nUR                  UR                  SS 5        M#     M,     [        S U 5       5      n[        S U 5       5      nXE4$ )z@
Get the maximum height and width across all images in a batch.
r   Nc              3   *   #    U  H	  oS    v   M     g7f)r   Nr7   .0r   s     r$   	<genexpr>'get_max_height_width.<locals>.<genexpr>b   s     5!W   c              3   *   #    U  H	  oS    v   M     g7f)   Nr7   rD   s     r$   rF   rG   c   s     4GrH   )appendr   r   )rA   image_sizesimagesr   
max_height	max_widths         r$   get_max_height_widthrP   Y   sa     KEu{{23/0   555J444I""r&   torch.Tensoroutput_sizec                     U R                   SS u  p#[        R                  " U[        R                  U R                  S9nSUSU2SU24'   U$ )z[
Make a pixel mask for the image, where 1 indicates a valid pixel and 0 indicates padding.
r   NdtypedevicerJ   )r   torchzerosint64rV   )r   rR   input_heightinput_widthmasks        r$   make_pixel_maskr]   g   sM     !&BC 0L;;{%++ellKD()D,	$%Kr&   c                     ^  \ rS rSr\r\R                  r\	r
\rSrSrSrSrSrSrSrSSS.rSS/rS	\\   4U 4S
 jjr\S\S	\\   S\4U 4S jj5       rS\S\4S jr S)SSS\SSSS4U 4S jjjrS*S\S\S\4S jjr SSS\!\!S      4S jr" S+SSS\#\\4   S\S\#S   4S jjr$S\!\!S      S\%S\SSS\%S\&S \%S!\&\!\&   -  S-  S"\&\!\&   -  S-  S#\%S-  S$\%S-  S%\%S-  S&\'\(-  S-  S\4S' jr)S(r*U =r+$ ),Idefics2ImageProcessorq   TFiz  i  )r   r   pixel_valuespixel_attention_maskkwargsc                 &   > [         TU ]  " S0 UD6  g )Nr7   )super__init__)selfrc   	__class__s     r$   rf   Idefics2ImageProcessor.__init__   s    "6"r&   rM   r   c                 &   > [         TU ]  " U40 UD6$ N)re   
preprocess)rg   rM   rc   rh   s      r$   rl   !Idefics2ImageProcessor.preprocess   s    w!&3F33r&   r   c                     [        U5      $ )zConvert an image to RGB format.)r2   )rg   r   s     r$   r2   %Idefics2ImageProcessor.convert_to_rgb   s    e$$r&   NrQ   r   resamplez7PILImageResampling | tvF.InterpolationMode | int | Nonec                 0  > UR                   (       a  UR                  (       a  [        X5      nOFUR                  (       a*  UR                  (       a  UR                  UR                  4nO[        S5      e[        TU ]  " U[        US   US   S94SU0UD6$ )z7Resize using Idefics2 shortest_edge/longest_edge logic.zWSize must contain 'height' and 'width' keys or 'shortest_edge' and 'longest_edge' keys.r   rJ   )r   r    rp   )	r   r   r%   r   r    
ValueErrorre   resizer   )rg   r   r   rp   rc   new_sizerh   s         r$   rs   Idefics2ImageProcessor.resize   sx     $"3"33E@H[[TZZTZZ0Hvwww~eXXa[QR%Tr_grkqrrr&   expected_ndimsc                 6    U R                  U5      n[        XS9$ )z1Prepare a nested images structure for processing.)rv   )fetch_imagesr   )rg   rM   rv   s      r$   _prepare_images_structure0Idefics2ImageProcessor._prepare_images_structure   s    ""6*)&PPr&   c           	      &   UR                   SS u  p#US-  nUS-  nUSSU2SU24   USSU2US24   USUS2SU24   USUS2US24   U/n[        [        US   5      5       VVs/ s H  ov Vs/ s H  oU   PM	     snPM     nnnU$ s  snf s  snnf )zi
Split a batch of images into 4 equal sub-images, and concatenate that sequence with the original image.
r   N   .r   )r   rangelen)	rg   rM   r   r    	mid_width
mid_heightbatch_split_imagesir   s	            r$   split_images#Idefics2ImageProcessor.split_images   s     RS)QJ	q[
 3ZiZ/03YZ/03
ZiZ/03
YZ/0
 SXX[\nop\qXrRstRsQ5GH5GEQx5GHRst!! Its   %	B.B<BBpadded_sizefill)rQ   rQ   c                 L   UR                   SS nUS   US   -
  nUS   US   -
  nUS:  d  US:  a  [        SU SU S35      eXB:w  a  SSXe4n[        R                  " XUSS	9n[        R
                  " U[        R                  UR                  S
9nSUSUS   2SUS   24'   X4$ )zM
Pad an image to the specified size and create the corresponding pixel mask.
r   Nr   rJ   zzPadding dimensions are negative. Please make sure that the padded size is larger than the original size. Got padded size: z, original size: .constant)r   padding_moderT   )r   rr   tvFpadrW   rX   rY   rV   )	rg   r   r   r   original_sizepadding_bottompadding_rightpadding
pixel_masks	            r$   r   Idefics2ImageProcessor.pad   s     BC($Q-*::#Aq)99A!233>-?PQ^P__`b 
 '!];GGGEJOE[[EKKU
=>
%]1%%'9q)9'99:  r&   	do_resize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padr6   disable_groupingreturn_tensorsc           	         [        XSS9u  nn0 nUR                  5        H#  u  nnU(       a  U R                  U5      nUUU'   M%     [        UUSS9nU(       a7  [	        U5       H(  u  nnU VVs/ s H  nU  H  nUPM     M     snnUU'   M*     [        UUSS9u  nn0 nUR                  5        H"  u  nnU(       a  U R                  UX4S9nUUU'   M$     [        UUSS9n[        UUSS9u  nn0 nUR                  5        H  u  nnU R                  UXVXxU	5      nUUU'   M!     [        UUSS9nU
(       a  [        S U 5       5      n[        U5      u  nn[        R                  " [        U5      U/US   S   R                  S   UU4Q7SUS   S   R                  06n [        R                  " [        U5      U/UU4Q7SUS   S   R                  06n![	        U5       H<  u  nn[	        U5       H'  u  n"nU R                  UUU45      u  U UU"4'   U!UU"4'   M)     M>     U nU
(       a  UW!S.n#OHUS	:X  a>  S
[        R                  " U Vs/ s H  n[        R                  " U5      PM     sn5      0n#OS
U0n#[!        U#US9$ s  snnf s  snf )NT)r   	is_nested)r   )rp   c              3   8   #    U  H  n[        U5      v   M     g 7frk   )r~   )rE   images_s     r$   rF   5Idefics2ImageProcessor._preprocess.<locals>.<genexpr>  s      N=M'W=Ms   r   rV   )ra   rb   ptra   )datatensor_type)r   itemsr   r   	enumeraters   rescale_and_normalizer   rP   rW   rX   r~   r   rV   r   stackr   )$rg   rM   r   r   rp   r   r   r   r   r   r   r6   r   r   rc   grouped_imagesgrouped_images_indexsplit_images_groupedr   stacked_imagesr   r   group_imagessublistr   resized_images_groupedresized_imagesprocessed_images_groupedprocessed_imagesmax_num_imagesrN   rO   processed_images_paddedpixel_attention_masksjr   s$                                       r$   _preprocess"Idefics2ImageProcessor._preprocess   s   " 0E0
,,  "%3%9%9%;!E>!!%!2!2>!B*8 ' &< &&:<P\`a#,\#:<8D"ZWRY5RY5"ZQ $; 0E+;t0
,, "$%3%9%9%;!E>!%^T!U,:"5) &< ((>@T`de/D-=0
,, $& %3%9%9%;!E>!77
LV_N /=$U+	 &<
 **BDXdhi  N=M NNN$89I$J!J	&+kk$%' #1%a(..q1:yI' (*1-44	'# %*KK$%% i(% (*1-44	%! ''78	6 )& 1HAuQUQYQY
I6RN+AqD13HA3N !2 9
  7$4NcdDt#"EKKSc0dScV1DSc0d$efD"$45D>BBg #[` 1es   /J J
r7   rk   )r   )r   ),r8   r9   r:   r;   r4   valid_kwargsr   BILINEARrp   r   r   r	   r   r   r   r   r   do_convert_rgbr6   default_to_squarer   model_input_namesr   rf   r   r
   r   rl   r2   r   rs   r   ry   listr   tupler   r=   floatstrr   r   r?   __classcell__)rh   s   @r$   r_   r_   q   sR   /L!**H'J%IIJLFN #6D')?@#(D!E # 4 4v>Z7[ 4`l 4 4%J %: % OS	ss s L	s 
s s"Q
 QC QXb Q
"> "d4;O6P "* PQ!#!27S/!IL!	-	.!2OCT.)*OC OC 	OC
 LOC OC OC OC DK'$.OC 4;&-OC tOC !4KOC +OC j(4/OC  
!OC OCr&   r_   )(r<   numpynprW   image_processing_backendsr   image_processing_utilsr   image_transformsr   r   image_utilsr   r	   r
   r   r   r   processing_utilsr   r   utilsr   r   r   PILr   torchvision.transforms.v2r   r   r   r   r%   r2   r4   r   rP   r]   r_   __all__r7   r&   r$   <module>r      s    *   ; 2 E  5 D D  7h 5c? ,*  $<u #d40I+J&K #PUVY[^V^P_ #> c3h N  lC/ lC lC^ $
$r&   