
    Z j&                        S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	  SSK
JrJr  SSKJrJrJrJrJr  SS	KJrJr  SS
KJrJrJr  \R4                  " \5      r " S S\SS9r    SS\S\S\S\S\S\S\4S jjr\ " S S\5      5       r S/r!g)z#Image processor class for GLM-4.1V.    N)
functional   )TorchvisionBackend)BatchFeature)group_images_by_shapereorder_images)OPENAI_CLIP_MEANOPENAI_CLIP_STD
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringloggingc                   8    \ rS rSr% Sr\\S'   \\S'   \\S'   Srg)Glm4vImageProcessorKwargs&   a6  
patch_size (`int`, *optional*, defaults to 14):
    The spatial patch size of the vision encoder.
temporal_patch_size (`int`, *optional*, defaults to 2):
    The temporal patch size of the vision encoder.
merge_size (`int`, *optional*, defaults to 2):
    The merge size of the vision encoder to llm encoder.

patch_sizetemporal_patch_size
merge_size N)__name__
__module____qualname____firstlineno____doc__int__annotations____static_attributes__r       ځ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/glm4v/image_processing_glm4v.pyr   r   &   s     OOr"   r   F)total
num_framesheightwidthtemporal_factorfactor
min_pixels
max_pixelsc                 :   X:  a  [        SU  SU 35      eX:  d  X$:  a*  [        XA-  XB-  5      n[        X-  5      n[        X'-  5      n[        X5      [        X5      -  S:  a#  [        S[        X5      [        X5      -   35      e[	        X-  5      U-  n[	        X$-  5      U-  n	[	        X-  5      U-  n
X-  U	-  U:  aq  [
        R                  " X-  U-  U-  5      n[        U[
        R                  " X-  U-  5      U-  5      n[        U[
        R                  " X+-  U-  5      U-  5      n	X4$ X-  U	-  U:  aZ  [
        R                  " XPU-  U-  -  5      n[
        R                  " X-  U-  5      U-  n[
        R                  " X+-  U-  5      U-  n	X4$ )Nzt:z% must be larger than temporal_factor:   z4absolute aspect ratio must be smaller than 200, got )	
ValueErrormaxr   minroundmathsqrtfloorceil)r%   r&   r'   r(   r)   r*   r+   scaleh_barw_bart_barbetas               r#   smart_resizer;   5   s    #2j\)NN_`aa%.FOV^4V^$EM"
6C..4B3vCUX[\bXjCjBkl
 	
 &/"V+E%.!F*E*.//AE}uz)yy*-5CDFDJJv}v'=>GHFDJJu|f'<=FG < 
		+yyF':U'BCD		&-&01F:		%,/069<r"   c                   h  ^  \ rS rSrSr\R                  rSSS.rSr	Sr
SrSr\r\rSrSrS	rS	r\rS
S/rS\\   4U 4S jjr\S\S\\   S\4U 4S jj5       rS\4U 4S jjrS\ S   S\!S\"SSS\!S\#S\!S\#\ \#   -  S-  S\#\ \#   -  S-  S\$S\$S\$S \!S-  S!\%\&-  S-  S\4S" jr'S'S#\$S$\$4S% jjr(S&r)U =r*$ )(Glm4vImageProcessorY   T 1  iq )shortest_edgelongest_edgeFgp?      pixel_valuesimage_grid_thwkwargsc                    > [         TU ]  " S0 UD6  U R                  bB  U R                  R                  (       a  U R                  R                  (       d  [        S5      eg g )N:size must contain 'shortest_edge' and 'longest_edge' keys.r   )super__init__sizer@   rA   r.   )selfrF   	__class__s     r#   rJ   Glm4vImageProcessor.__init__k   sM    "6"99 99**$))2H2H !]^^ 3I !r"   imagesreturnc                 &   > [         TU ]  " U40 UD6$ N)rI   
preprocess)rL   rO   rF   rM   s      r#   rS   Glm4vImageProcessor.preprocessq   s    w!&3F33r"   c                    > [         TU ]  " S0 UD6nUR                  SU R                  5      nUR                  (       a  UR
                  (       d  [        S5      eU$ )z
Update kwargs that need further processing before being validated
Can be overridden by subclasses to customize the processing of kwargs.
rK   rH   r   )rI   _standardize_kwargsgetrK   r@   rA   r.   )rL   rF   rK   rM   s      r#   rV   'Glm4vImageProcessor._standardize_kwargsu   sM    
 ,6v6zz&$)),!!):):YZZr"   ztorch.Tensor	do_resizerK   resamplez7PILImageResampling | tvF.InterpolationMode | int | None
do_rescalerescale_factordo_normalize
image_meanN	image_stdr   r   r   disable_groupingreturn_tensorsc                    [        XS9u  nn0 nUR                  5        He  u  nnUR                  SS u  nnU(       aA  [        UUUUX-  UR                  UR
                  S9u  nnU R                  U[        UUS9US9nUUU'   Mg     [        UU5      n[        UUS9u  nn0 n0 nUR                  5        GH;  u  nnUR                  SS u  nnU R                  UXVXxU	5      nUR                  S:X  a  UR                  S5      nUR                  S   U-  S	:w  aH  USS2S
S24   R                  SUUR                  S   U-  -
  SSS5      n[        R                  " UU/SS9nUR                  SS u  nnn UU-  n!UU
-  UU
-  n#n"UR                  UU!UU U"U-  UU
U#U-  UU
5
      nUR!                  S	SSSSSSSSS5
      nUR#                  UU!U"-  U#-  U U-  U
-  U
-  5      n$U$UU'   U!U"U#//U-  UU'   GM>     [        UU5      n%[        UU5      n[        R                  " U%S	S9n&[        R$                  " U5      n'['        U&U'S.US9$ )z)
Preprocess an image or batch of images.
)r`   N)r%   r&   r'   r(   r)   r*   r+   )r&   r'   )rK   rZ         r   )dimr            rC      	   )rD   rE   )datatensor_type)r   itemsshaper;   r@   rA   resizer   r   rescale_and_normalizendim	unsqueezerepeattorchcatviewpermutereshapetensorr   )(rL   rO   rY   rK   rZ   r[   r\   r]   r^   r_   r   r   r   r`   ra   rF   grouped_imagesgrouped_images_indexresized_images_groupedrp   stacked_imagesr&   r'   resized_heightresized_widthresized_imagesprocessed_images_groupedprocessed_gridspatchesrepeats
batch_sizet_lenchannelgrid_tgrid_hgrid_wflatten_patchesprocessed_imagesrD   rE   s(                                           r#   _preprocessGlm4vImageProcessor._preprocess   s   , 0EV/o,,!#%3%9%9%;!E>*005MFE0<2!$7%2#11#001- "&"!}M% "- "
 -;"5)# &<& ((>@TU/D^fv/w,,#% %3%9%9%;!E>,:,@,@,E)NM00
LV_G ||q !++A.}}Q"55:!!RS&/00*gmmA.>AT.TUWXZ[]^  ))Wg$6A>)0r):&Jw11F+z9=J;VFFll#*$*$G ooaAq!Q1aCG%oo&(--
:ZGO />$U+'-vv&>%?*%LOE"S &<V **BDXY(:NOyy!1q9o6"..Q_m
 	
r"   r&   r'   c           
         UR                  SU R                  5      nUR                  SU R                  5      nUR                  SU R                  5      nXE-  n[	        U R
                  UUUUS   US   U R
                  S9u  pX-  X-  pX-  $ )aY  
A utility that returns number of image patches for a given image size.

Args:
    height (`int`):
        Height of the input image.
    width (`int`):
        Width of the input image.
    images_kwargs (`dict`, *optional*)
        Any kwargs to override defaults of the image processor.
Returns:
    `int`: Number of image patches per image.
r   r   rK   r@   rA   )r%   r&   r'   r)   r*   r+   r(   )rW   r   r   rK   r;   r   )rL   r&   r'   images_kwargsr   r   rK   r)   r   r   r   r   s               r#   get_number_of_image_patches/Glm4vImageProcessor.get_number_of_image_patches   s     #&&|T__E
"&&|T__E
  3((4//O,N+ 44)
% (5}7Rr"   r   rR   )+r   r   r   r   rY   r   BICUBICrZ   rK   default_to_squarer[   r\   r]   r	   r^   r
   r_   do_convert_rgbr   r   r   r   valid_kwargsmodel_input_namesr   rJ   r   r   r   rS   dictrV   listboolr   floatr   strr   r   r   r!   __classcell__)rM   s   @r#   r=   r=   Y   s   I!))H&HDJNL!JINJJ,L')9:_(A!B _ 4 4v>W7X 4]i 4 4
t 
d
^$d
 d
 	d

 Ld
 d
 d
 d
 DK'$.d
 4;&-d
 d
 !d
 d
 +d
 j(4/d
" 
#d
L# c  r"   r=   )rC      r?   i   )"r   r2   rv   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   image_transformsr   r   image_utilsr	   r
   r   r   r   processing_utilsr   r   utilsr   r   r   
get_loggerr   loggerr   r   r;   r=   __all__r   r"   r#   <module>r      s    *   7 ; 2 E  5 8 8 
		H	%E & 0!!! ! 	!
 ! ! !H j, j jZ !
!r"   