
    Z j!              	          S SK r SSKJr  SSKJrJr  SSKJrJrJ	r	J
r
JrJrJr  SSKJrJr  SSKJr  SS	KJrJr  \" 5       (       a  SS
KJr  \	" 5       (       a  S SK r \" 5       (       a  S SKJr  O\
" 5       (       a  S SKJr  \R8                  " \5      r " S S\SS9rSSS\ SS4S jr!SSSSS\ S\"S   4S jr#\" S\5       " S S\5      5       r$S/r%g)    N   )BatchFeature)UnpackVideosKwargs)
TensorTypeadd_start_docstringsis_torch_availableis_torchvision_availableis_torchvision_v2_availableis_vision_availablelogging)BASE_VIDEO_PROCESSOR_DOCSTRINGBaseVideoProcessor)
VideoInput   )_SUPPORTED_SOFT_TOKENS get_aspect_ratio_preserving_size)PILImageResampling)
functionalc                   8    \ rS rSr% Sr\\S'   \\S'   \\S'   Srg)Gemma4VideoProcessorKwargs1   a?  
patch_size (`int`, *optional*):
    Size of each image patch in pixels.
max_soft_tokens (`int`, *optional*):
    Maximum number of soft (vision) tokens per video frame.
    Must be one of {70, 140, 280, 560, 1120}.
pooling_kernel_size (`int`, *optional*):
    Spatial pooling kernel size applied after patchification.

patch_sizemax_soft_tokenspooling_kernel_size N)__name__
__module____qualname____firstlineno____doc__int__annotations____static_attributes__r       ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/gemma4/video_processing_gemma4.pyr   r   1   s     Or%   r   F)totalvideotorch.Tensorr   returnc                     U R                   u  p#pEXA-  nXQ-  nU R                  X#XaXq5      nUR                  SSSSSS5      nUR                  X&U-  S5      nU$ )z
Convert 4D tensor video of shape (num_frames, num_channels, height, width) into 3D tensor of patches of shape
(num_frames, num_patches_height * num_patches_width, patch_size * patch_size * num_channels).
r         r      r   )shapereshapepermute)	r(   r   
num_framesnum_channelsheightwidthnum_patches_heightnum_patches_widthpatched_videos	            r&   convert_video_to_patchesr:   A   sv    
 /4kk+Jf-+MM"4BSM "))!Q1a;M!))*K\6\^`aMr%   	positionstarget_length)r)   r)   c                     U R                   S   nX#-
  nUS:  ab  SSSUSS/nSSSUSS4n[        R                  R                  R	                  XSSS9n [        R                  R                  R	                  XSSS9nX4$ )z.
Pad the video along to max number of patches
r   r   constant)modevaluer/   )r0   torchnnr   pad)r(   r;   r<   current_lengthpadding_lengthpaddingpos_paddings          r&   pad_to_max_patchesrH   Q   s     [[^N"3NaNAq1!Q15##''Zq'QHH''++I[]+^	r%   zbConstructs a Gemma4 video processor that samples frames from videos for use with the Gemma4 model.c                     ^  \ rS rSr\R
                  r/ SQr/ SQrSr	Sr
SrSrSrSrSrSrSrSrS	r\rS
S/rS\\   4U 4S jjrU 4S jrS\R6                  S\S\S\S\R<                  S\R6                  4S jrS\ S\\   S\!4U 4S jjr"   S$S\#S   S\$SSS\$S\%S\$S\%\#\%   -  S-  S\%\#\%   -  S-  S \&\'-  S-  S\S-  S!\S-  S\S-  S\!4S" jjr(S#r)U =r*$ )%Gemma4VideoProcessora   )        rL   rL   )      ?rM   rM   NT       F   r   pixel_values_videosvideo_position_idskwargsc                    > [         TU ]  " S0 UD6  U R                  [        ;  a   [	        S[         SU R                   S35      eg )N!`max_soft_tokens` must be one of , got .r   )super__init__r   r   
ValueErrorselfrS   	__class__s     r&   rY   Gemma4VideoProcessor.__init__w   sN    "6"'==@AW@XX^_c_s_s^ttuvww >r%   c                 0   > SUS'   [         TU ]  " S0 UD6  g )NF	do_resizer   )rX   _validate_preprocess_kwargsr[   s     r&   ra   0Gemma4VideoProcessor._validate_preprocess_kwargs}   s    
 ${+5f5r%   r(   r   max_patchesr   resampler*   c                     UR                   S   UR                   S   pv[        UUUUUS9u  pX:X  a  X:X  a  U$ [        R                  " UX/USS9$ )Nr/   )r5   r6   r   rc   r   T)sizeinterpolation	antialias)r0   r   Fresize)
r\   r(   r   rc   r   rd   r5   r6   target_heighttarget_widths
             r&   aspect_ratio_preserving_resize3Gemma4VideoProcessor.aspect_ratio_preserving_resize   si     BR&F!# 3'
# "|'<Lxx."	
 	
r%   videosc                 &   > [         TU ]  " U40 UD6$ )N)rX   
preprocess)r\   rp   rS   r]   s      r&   rr   Gemma4VideoProcessor.preprocess   s    
 w!&3F33r%   r)   r`   z F.InterpolationMode | int | None
do_rescalerescale_factordo_normalize
image_mean	image_stdreturn_tensorsr   c           	         U[         ;  a  [        S[          SU S35      eXS-  -  n/ n/ n/ nSnU GHY  nU(       a  U R                  UU
UUUS9nU R                  UXEXgU5      nUR                  S   nUR                  S   U
-  nUR                  S	   U
-  n[        UU
5      nUR                  UR                  S   US-  -  5        UR                  n[        R                  " [        R                  " UUS
9[        R                  " UUS
9SS9n[        R                  " US	S9nUR                  UR                  S   S5      nUS   R                  USS5      n[        UUU5      u  nnUR                  U5        UR                  U5        GM\     [        R                  " USS9n[        R                  " USS9nUUUS.n[        UU	S9$ )NrU   rV   rW   r,   r   )r(   r   rc   r   rd   r   rf   r/   )devicexy)indexing)dim)N.)rQ   rR   num_soft_tokens_per_video)datatensor_type)r   rZ   rn   rescale_and_normalizer0   r:   appendr{   rA   meshgridarangestackr1   repeatrH   r   )r\   rp   r`   rd   rt   ru   rv   rw   rx   ry   r   r   r   rS   rc   pixel_valuesposition_idsr   r3   r(   patch_heightpatch_widthpatchesr{   
patch_gridstacked_gridreal_positionsr;   r   s                                r&   _preprocess Gemma4VideoProcessor._preprocess   s     "88@AW@XX^_n^oopqrr%Q(>>$&!
E;;) +(;% <  ..ujR^luvEQJ ;;r?j8L++b/Z7K.ujAG%,,W]]1-=ATVWAW-WX\\F[8\&9J
 !;;zr:L)11'--2BAFN+I6==j!QON!3G^[!YGY(	*= B {{<Q7{{<Q7 $0".)B

 >BBr%   r   )NNN)+r   r   r   r    r   BICUBICrd   rw   rx   rg   default_to_squaredo_convert_rgbr`   rt   rv   r3   do_sample_framesr   r   r   r   valid_kwargsmodel_input_namesr   rY   ra   rA   Tensorr"   rj   InterpolationModern   r   r   rr   listboolfloatstrr   r   r$   __classcell__)r]   s   @r&   rJ   rJ   a   s   
 "))H JIDNIJLJJO-L.0DEx(B!C x6
||
 
 	

 !
 %%
 

644 344 
	4$ "&&**.CC^$CC CC 5	CC
 CC CC CC DK'$.CC 4;&-CC j(4/CC $JCC tCC !4ZCC 
CC CCr%   rJ   )&rA   image_processing_utilsr   processing_utilsr   r   utilsr   r   r	   r
   r   r   r   video_processing_utilsr   r   video_utilsr   image_processing_gemma4r   r   image_utilsr   torchvision.transforms.v2r   rj   torchvision.transforms
get_loggerr   loggerr   r"   r:   tuplerH   rJ   __all__r   r%   r&   <module>r      s      2 4   Y % ] 1  96 
		H	%U  N    &4EH
)*  h"EC- EC	ECP "
"r%   