
    Z jY                     D   S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	J
r
  SSKJrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJrJrJrJr  \R8                  " \5      rS\\\      \\   -  \-  S\\\      4S jr  " S S\SS9r! " S S\	5      r"\ " S S\5      5       r#S/r$g)zImage processor class for Fuyu.    N)
functional   )TorchvisionBackend)BatchFeatureget_size_dict)group_images_by_shapereorder_images)
ImageInputPILImageResamplingSizeDictis_valid_imagemake_list_of_images)ImagesKwargsUnpack)
TensorTypeauto_docstringloggingrequires_backendsimagesreturnc                    [        U 5      (       a  U //$ [        U [        5      (       a  [        S U  5       5      (       a  U $ [        U [        5      (       a  U  Vs/ s H  n[	        U5      PM     sn$ [        S5      es  snf )Nc              3   B   #    U  H  n[        U[        5      v   M     g 7fN)
isinstancelist).0images     /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/fuyu/image_processing_fuyu.py	<genexpr>.make_list_of_list_of_images.<locals>.<genexpr>1   s     'TVE
5$(?(?Vs   zHimages must be a list of list of images or a list of images or an image.)r   r   r   allr   
ValueError)r   r   s     r   make_list_of_list_of_imagesr#   +   su     fz&$C'TV'T$T$T&$8>?u#E*??
_
`` @s   A?c                   >    \ rS rSr% Sr\S-  \S'   \\S'   \\S'   Sr	g)FuyuImagesKwargs:   a  
patch_size (`dict[str, int]`, *optional*, defaults to `{"height": 30, "width": 30}`):
    Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
padding_value (`float`, *optional*, defaults to 1.0):
    The value to pad the image with.
padding_mode (`str`, *optional*, defaults to "constant"):
    The padding mode to use when padding the image.
N
patch_sizepadding_valuepadding_mode )
__name__
__module____qualname____firstlineno____doc__r   __annotations__floatstr__static_attributes__r*       r   r%   r%   :   s     4r4   r%   F)totalc                   @    \ rS rSrSrSS\\-  S-  4S jjrS	S jrSr	g)
FuyuBatchFeatureI   z
BatchFeature class for Fuyu image processor and processor.

The outputs dictionary from the processors contains a mix of tensors and lists of tensors.
Ntensor_typec                   ^^^	^
 Uc  U $ U R                  US9u  m	mUU	4S jmUU
4S jnU R                  5        H  u  m
n[        U[        5      (       aG  [        US   [        5      (       a/  U VVs/ s H  oU Vs/ s H
  oc" U5      PM     snPM     snnU T
'   Mb  [        U[        5      (       a  U Vs/ s H
  oc" U5      PM     snU T
'   M  U" U5      U T
'   M     U $ s  snf s  snnf s  snf )a  
Convert the inner content to tensors.

Args:
    tensor_type (`str` or [`~utils.TensorType`], *optional*):
        The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
        `None`, no modification is done.
)r9   c                 2   > T" U 5      (       a  U $ T" U 5      $ r   r*   )elem	as_tensor	is_tensors    r   _convert_tensor<FuyuBatchFeature.convert_to_tensors.<locals>._convert_tensor^   s    T?"r4   c                 X   >  T" U 5      $ !   TS:X  a  [        S5      e[        S5      e= f)Noverflowing_valueszKUnable to create tensor returning overflowing values of different lengths. zUnable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.)r"   )r<   r?   keys    r   _safe_convert_tensorAFuyuBatchFeature.convert_to_tensors.<locals>._safe_convert_tensorc   sB    &t,,..$%rss X s    )r   )_get_is_as_tensor_fnsitemsr   r   )selfr9   kwargsrD   valueelemsr<   r?   r=   r>   rC   s          @@@@r   convert_to_tensors#FuyuBatchFeature.convert_to_tensorsP   s     K#99k9R	9	#
		 **,JC%&&:eAh+E+EY^_Y^PUUKUT248UKY^_S	E4((DIJED1$7EJS	 17S	 '  L_ Ks   2	C$;CC$5C*C$c           
        ^^^^ [        U S/5        SSKmSSKJnJn  0 nTR                  S5      mTct  [        T5      S:  ae  TS   nU" U5      (       a  OR[        U[        5      (       d"  U" U5      (       d  [        U[        5      (       a  UmO[        S[        U5       S35      eUUUU4S	 jnU R                  5        H  u  p[        U	[        5      (       aP  [        U	S   [        5      (       a8  / n
U	 H*  nU
R                  U Vs/ s H
  o" U5      PM     sn5        M,     XU'   Mj  [        U	[        5      (       a  U	 Vs/ s H
  o" U5      PM     snXX'   M  U" U	5      XX'   M     XPl        U $ s  snf s  snf )
a  
Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
different `dtypes` and sending the `BatchFeature` to a different `device`.

Args:
    args (`Tuple`):
        Will be passed to the `to(...)` function of the tensors.
    kwargs (`Dict`, *optional*):
        Will be passed to the `to(...)` function of the tensors.

Returns:
    [`BatchFeature`]: The same instance after modification.
torchr   Nr   )is_torch_deviceis_torch_dtypedevicez*Attempting to cast a BatchFeature to type z. This is not supported.c                 ~   > TR                   " U 5      (       a  U R                  " T0 TD6$ Tb  U R                  TS9$ U $ )N)rR   )is_floating_pointto)r<   argsrR   rI   rO   s    r   _to FuyuBatchFeature.to.<locals>._to   sD    &&t,,ww///!wwfw--Kr4   )r   rO   utilsrP   rQ   getlenr   r2   intr"   rG   r   appenddata)rH   rV   rI   rP   rQ   new_dataargrW   kvnew_vrK   r<   rR   rO   s    ``          @@r   rU   FuyuBatchFeature.to{   sN    	$	*<H%>c$i!mq'Cc""C%%)=)=CQTAUAU !#McRUhZWo!pqq	 	 JJLDA!T""z!A$'='=ELL!>#d)!>? #At$$567QTs4yQ7!!f ! 	 "? 8s   	E2E7r^   r   )r   r   )
r+   r,   r-   r.   r/   r2   r   rL   rU   r3   r*   r4   r   r7   r7   I   s#    )cJ.>.E )V:r4   r7   c                     ^  \ rS rSrSrSSS.rSSS.r\R                  r	Sr
SrSrSrS	rS	rSrS
r/ SQr\rS\\   4U 4S jjr S5S\S\S\4S jjr  S6S\R6                  S\SSS\S\R6                  4
U 4S jjjrS\S   S\S\SSS\S\ S\S\ \\    -  S-  S\ \\    -  S-  S \S-  S!\ S-  S"\!S-  S#\S-  S$\!\"-  S-  S\#4S% jr$S7S&\S'\S(\S-  S\4S) jjr%S7S\R6                  S(\S-  S\R6                  4S* jjr& S7S+\R6                  S,\R6                  S-\R6                  S.\R6                  S/\S0\S1\S(\'\!\4   S-  S\#4S2 jjr( S7S(\'\!\4   \-  S-  S\'4U 4S3 jjjr)S4r*U =r+$ )8FuyuImageProcessor   Ti8  i  heightwidth   g      ?constantg      ?gp?r   image_input_idsimage_patchesimage_patch_indices_per_batch#image_patch_indices_per_subsequencerI   c                 &   > [         TU ]  " S0 UD6  g )Nr*   )super__init__)rH   rI   	__class__s     r   ru   FuyuImageProcessor.__init__   s    "6"r4   r   expected_ndimsr   c                 :    U R                  U5      n[        U5      $ r   )fetch_imagesr#   )rH   r   rx   s      r   _prepare_images_structure,FuyuImageProcessor._prepare_images_structure   s    
 ""6**622r4   Nr   sizeresamplez7PILImageResampling | tvF.InterpolationMode | int | None	antialiasc                   > Uc  [         R                  nUR                  SS u  pgUR                  UR                  pXy::  a  Xh::  a  U$ X-  n
X-  n[        X5      n[        Xl-  5      n[        X|-  5      n[        TU ]!  U[        XS9X4S9$ )av  
Resize an image to fit within `(size.height, size.width)` while maintaining aspect ratio.
Only resizes if the image is larger than the target size.
Args:
    image (`torch.Tensor`):
        Image to resize.
    size (`SizeDict`):
        Dictionary in the format `{"height": int, "width": int}` specifying the max size of the output image.
    resample (`PILImageResampling | tvF.InterpolationMode | int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        Resampling filter to use when resizing the image.
    antialias (`bool`, *optional*, defaults to `True`):
        Whether to apply antialiasing when resizing.
Nri   )r~   r   )
r   BILINEARshaperj   rk   minr\   rt   resizer   )rH   r   r}   r~   r   rI   image_heightimage_widthtarget_heighttarget_widthheight_scale_factorwidth_scale_factoroptimal_scale_factor
new_height	new_widthrv   s                  r   r   FuyuImageProcessor.resize   s    * )22H$)KK$4!&*kk4::|&<+HL+:)7"#6K<=
:;	w~8:?(  
 	
r4   ztorch.Tensor	do_resize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padr(   r)   disable_groupingreturn_tensorsc           	          U Vs/ s H  nU(       d  M  US   R                   SS  PM      nn[        XSS9u  nn0 nUR                  5        H"  u  nnU(       a  U R                  UX4S9nUUU'   M$     [	        UUSS9nU Vs/ s H  nU(       d  M  US   R                   SS  PM      nnU Vs/ s H	  nUS   /PM     nnU Vs/ s H	  nUS   /PM     nn[        UU5       VVs/ s H  u  nnUS   US   -  /PM     nnnU
(       a  U R                  UUUUUSS9n[        UUSS9u  nn0 nUR                  5        H  u  nnU R                  UXVXxU	5      nUUU'   M!     [	        UUSS9n [        U UUUS	.US
9$ s  snf s  snf s  snf s  snf s  snnf )Nr   r   T)r   	is_nested)r   r}   r~   )r      )pad_size
fill_valuer)   r   r   )r   image_unpadded_heightsimage_unpadded_widthsimage_scale_factors)r^   r9   )	r   r   rG   r   r	   zippadrescale_and_normalizer7   )!rH   r   r   r}   r~   r   r   r   r   r   r   r(   r)   r   r   rI   batch_imageoriginal_image_sizesgrouped_imagesgrouped_images_indexresized_images_groupedr   stacked_imagesresized_imagesimage_sizes
image_sizer   r   original_sizeresized_sizer   processed_images_groupedprocessed_imagess!                                    r   _preprocessFuyuImageProcessor._preprocess  s   & NTcVkWb 9A 4 4RS 9Vc/D0
,, "$%3%9%9%;!E>!%>!`,:"5) &< ((>@T`deDRbN[Va0{1~++BC0NbDO!PKj:a=/K!PCN O;Z*Q-; O 033G/U
/U+| !_}Q//0/U 	 
 !XX()!1 & N 0E-=0
,, $& %3%9%9%;!E>!77
LV_N /=$U+ &< **BDXdhi**@)>':	 '
 	
S  d c!P O
s(   E6E6 E;E;+F F"F
r   r   r'   c                 <   UcA  [        U R                  [        5      (       a  U R                  nO[        S0 U R                  D6nUR                  UR                  pTX-  S:w  a  [        SU< SU 35      eX%-  S:w  a  [        SU< SU 35      eX-  nX%-  nXg-  nU$ )a:  
Calculate number of patches required to encode an image.
Args:
    image_height (`int`):
        Height of the image.
    image_width (`int`):
        Width of the image.
    patch_size (`SizeDict`, *optional*):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
r   zimage_height=z must be divisible by zimage_width=r*   )r   r'   r   rj   rk   r"   )	rH   r   r   r'   patch_heightpatch_widthnum_patches_per_dim_hnum_patches_per_dim_wnum_patchess	            r   get_num_patches"FuyuImageProcessor.get_num_patchesH  s     $//844!__
%88
$.$5$5z7G7Gk&!+.D\NSTT$)~-CK=QRR , < + :+Cr4   c                    [        U S/5        UcA  [        U R                  [        5      (       a  U R                  nO[        S0 U R                  D6nUR                  UR
                  pCUR                  u  pV  nUR                  SX35      nUR                  SXD5      n	U	R                  5       n	U	R                  XVSX45      n	U	R                  SSSSS5      n	U	R                  USXc-  U-  5      n	U	$ )	a?  
Convert an image into a tensor of patches using PyTorch's unfold operation.
Args:
    image (`torch.Tensor`):
        Image to convert. Shape: [batch, channels, height, width]
    patch_size (`SizeDict`, *optional*):
        Dictionary in the format `{"height": int, "width": int}` specifying the size of the patches.
rO      r   r      r   r*   )r   r   r'   r   rj   rk   r   unfold
contiguousviewpermutereshape)
rH   r   r'   r   r   
batch_sizechannels_unfolded_along_heightpatchess
             r   patchify_image!FuyuImageProcessor.patchify_imageb  s     	$	*$//844!__
%88
$.$5$5z7G7Gk%*[["
a %Q K'..q+K$$&,,zRS//!Q1a0//*b(2IK2WXr4   image_inputimage_presentimage_unpadded_himage_unpadded_wimage_placeholder_idimage_newline_idvariable_sizedc	           
      `   [        U S/5        UcB  [        U R                  [        5      (       a  U R                  nO6[        S0 U R                  D6nO [        U[        5      (       d  [        S0 UD6nUR                  UR
                  p/ n/ n/ n[        UR                  S   5       GHb  n/ n/ n[        UR                  S   5       GH  nX.U4   (       Ga  XU4   nUR                  S   UR                  S   nnU(       af  [        U[        R                  " X>U4   U	-  5      U	-  5      n[        U[        R                  " XNU4   U
-  5      U
-  5      nUSS2SU2SU24   nUUnnU R                  UUUS9n[        R                  " U/U[        R                  UR                  S9nU R!                  UR#                  S5      US9R%                  S5      nUUR                  S   :X  d   eU(       a{  UR'                  S	UU
-  5      n[        R                  " UR                  S   S/U[        R                  UR                  S9n[        R(                  " UU/SS
9nUR'                  S	5      nUR+                  U/5        UR+                  U5        UR+                  U5        GM  UR+                  [        R,                  " / [        R                  UR                  S95        GM     UR+                  U5        UR+                  U5        GMe     / n/ nU GH  nSn/ n/ n U H  n!U!U:H  n"[        R.                  " U"5      n[        R0                  " U[        R2                  U!R                  S9R5                  U!5      n#[        R6                  " U!S	5      n$[        R6                  " U!S	5      n%[        R8                  " U"SS9S   n&U#U-   U$U&'   U#U%U&'   UR+                  U$5        U R+                  U%5        UU-  nM     UR+                  U5        UR+                  U 5        GM
     [;        UUUUUS.S9$ )a  
Process images for model input. In particular, variable-sized images are handled here.

Args:
    image_input (`torch.Tensor` of shape [batch_size, subsequence_size, num_channels, height, width]):
        Tensor of images padded to model input size.
    image_present (`torch.Tensor` of shape [batch_size, subsequence_size, num_images]):
        Tensor of 1s and 0s indicating whether an image is present.
    image_unpadded_h (`torch.Tensor` of shape [batch_size, subsequence_size]):
        Tensor of unpadded image heights.
    image_unpadded_w (`torch.Tensor` of shape [batch_size, subsequence_size]):
        Tensor of unpadded image widths.
    image_placeholder_id (int):
        The id of the image placeholder token. Comes from an associated tokenizer.
    image_newline_id (int):
        The id of the image newline token. Comes from an associated tokenizer.
    variable_sized (bool):
        Whether to process images as variable-sized.
    patch_size (`dict[str, int]`, *optional*):
        Size of the patches.
rO   Nr   r   r   )r   r   r'   )dtyperR   )r   r'   r   )dimT)as_tuplern   re   r*   )r   r   r'   r   rj   rk   ranger   r   mathceilr   rO   fullint32rR   r   	unsqueezesqueezer   catr]   tensorcount_nonzeroarangeint64type_as	full_likenonzeror7   )'rH   r   r   r   r   r   r   r   r'   r   r   r   batch_image_patchesbatch_image_input_idsbatch_indexro   rp   subseq_indexr   r   r   new_hnew_wr   tensor_of_image_idsr   newline_idsrq   rr   sample_image_input_idsindex_offsetper_batch_indicesper_subsequence_indicessubseq_image_input_idspatches_maskindicesindices_in_stream_per_batch!indices_in_stream_per_subsequencepatches_indss'                                          r   preprocess_with_tokenizer_info1FuyuImageProcessor.preprocess_with_tokenizer_info}  s1   @ 	$	*$//844!__
%88
J11!/J/J$.$5$5z7G7Gk+-8::< !2!21!56K OM %k&7&7&: ; l!:;;'\(ABE05AA+L% !$( II&6L7P&QT`&`adpp! !$' II&6L7P&QT_&_`cnn! !&a%%&7 8495k"&"6"6%1{Wa #7 #K +0**$';5;;WbWiWi+' #118JWa1bjjklmG&'--*::::%.A.I.I"k]hNh.i+&+jj066q91=,"'++#.#5#5	' /4ii9Lk8Z`a.b+.A.I.I".M+MM5'*#**+>?!((1#**5<<%++VaVhVh+ijU !<V "((9&&}5_ 7b CE%HJ+&;"L "&(#*@&59MM#11,?,,{%++NdNkNkltt* /4oo>TVX.Y+49OODZ\^4_1$}}\DI!L<Cl<R+L9BI1,?!(()DE'../PQ+# +A& *001BC/667NO1 '<2   #8!41N7Z
 	
r4   c           	         > [         TU ]  " S0 UD6nUb(  [        U[        5      (       d  [        S0 [	        USS9D6nXS'   U$ )z1
Process Fuyu-specific kwargs before validation.
r'   )
param_namer*   )rt   _standardize_kwargsr   r   r   )rH   r'   rI   rv   s      r   r   &FuyuImageProcessor._standardize_kwargs  sJ     ,6v6!*Z*J*J!WM*$VWJ)|r4   r*   )r   )NTr   ),r+   r,   r-   r.   r   r}   r'   r   r   r~   r   r(   r)   r   r   r   r   r   model_input_namesr%   valid_kwargsr   ru   r
   r\   r{   rO   Tensorr   boolr   r   r1   r2   r   r7   r   r   r   dictr   r   r3   __classcell__)rv   s   @r   rg   rg      s   IT*D,J!**HFMLLJIJN $L#(8!9 #  33 3 
	3 OS&
||&
 &
 L	&

 &
 
&
 &
PD
^$D
 D
 	D

 LD
 D
 D
 D
 DK'$.D
 4;&-D
 tD
 t|D
 DjD
 +D
 j(4/D
" 
#D
LC c xZ^ jm 4ELL ho Y^YeYe H -1D
\\D
 ||D
  ,,	D

  ,,D
 "D
 D
 D
 cNT)D
 
D
P 8<cNX-4 
	 r4   rg   )%r/   r   rO   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   r   image_transformsr   r	   image_utilsr
   r   r   r   r   processing_utilsr   r   rY   r   r   r   r   
get_loggerr+   loggerr   r#   r%   r7   rg   __all__r*   r4   r   <module>r     s    &   7 ; A E  5  
		H	%aj!"T*%55
Ba	$z
a|5 l| l^ V+ V Vr
  
 r4   