
    Z j                         S r SSKrSSKJs  Js  Jr  SSKJr  SSK	J
r
JrJrJr  SSKJrJr  SSKJr  SSKJr  SS	KJrJrJr   " S
 S\SS9r " S S\5      rS/rg)z(Fast Video processor class for InternVL.    N   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDPILImageResamplingSizeDict)UnpackVideosKwargs)
TensorType)BaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videosc                   ,    \ rS rSr% \\-  \-  \S'   Srg) InternVLVideoProcessorInitKwargs   initial_shift N)	__name__
__module____qualname____firstlineno__boolfloatint__annotations____static_attributes__r       ڇ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/internvl/video_processing_internvl.pyr   r      s    %<#%%r   r   F)totalc                   B  ^  \ rS rSr\R
                  r\r\	r
SSS.rSrSrSrSrSrSr\rS\\   4U 4S jjr   S S	\S
\S-  S\\-  S-  S\\-  \-  S-  4S jjr S!S\S   S\S\S\SSS\S\S\S\S\S\\\   -  S-  S\\\   -  S-  S\\-  S-  S\4S jjr Sr!U =r"$ )"InternVLVideoProcessor   i  )heightwidthTFkwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__)selfr&   	__class__s     r   r)   InternVLVideoProcessor.__init__,   s    "6"r   Nmetadata
num_framesfpsr   c                 l   Ub  UOU R                   nUb  UOU R                  nUR                  nUc8  Ub5  Ub  UR                  c  [	        S5      e[        XaR                  -  U-  5      nUSL a  Xb-  S-  nX&:  a  [	        SU SU S35      e[        R                  " XFXb-  5      R                  5       nU$ )a  
Default sampling function which uniformly samples the desired number of frames between 0 and total number of frames.
If `fps` is passed along with metadata, `fps` frames per second are sampled uniformty. Arguments `num_frames`
and `fps` are mutually exclusive.

Args:
    metadata (`VideoMetadata`):
        Metadata of the video containing information about total duration, fps and total number of frames.
    num_frames (`int`, *optional*):
        Maximum number of frames to sample. Defaults to `self.num_frames`.
    fps (`int` or `float`, *optional*):
        Target frames to sample per second. Defaults to `self.fps`.
    initial_shift (`bool`, `float` or `int`, defaults to `self.initial_shift`):
        The initial shift to apply when sampling frames. If `True`, the shift is set so that frames are sampled from the middle of the video.

Returns:
    np.ndarray:
        Indices to sample video frames.
zAsked to sample `fps` frames per second but no video metadata was provided which is required when sampling with `fps`. Please pass in `VideoMetadata` object or use a fixed `num_frames` per input videoT   z(Video can't be sampled. The `num_frames=z` exceeds `total_num_frames=z`. )r.   r   total_num_framesr/   
ValueErrorr   torcharange)r*   r-   r.   r/   r   r&   r2   indicess           r   sample_frames$InternVLVideoProcessor.sample_frames/   s    6 $.#9Zt
)6)BHZHZ#44 #/8<<#7 h  -<sBCJD ,9A=M(::,Fbcsbttwx  ,,}@P@]^bbdr   videosztorch.Tensordo_convert_rgb	do_resizesizeresamplez7PILImageResampling | tvF.InterpolationMode | int | Nonedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdreturn_tensorsreturnc           	         [        U5      u  nn0 nUR                  5        H:  u  nnU(       a  U R                  U5      nU(       a  U R                  UXES9nUUU'   M<     [	        UU5      n[        U5      u  nn0 nUR                  5        H8  u  nnU(       a  U R                  UU5      nU R                  UXXU5      nUUU'   M:     [	        UU5      n[        SU0US9$ )N)r<   r=   pixel_values_videos)datatensor_type)r   itemsconvert_to_rgbresizer   center_croprescale_and_normalizer   )r*   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   r&   grouped_videosgrouped_videos_indexresized_videos_groupedshapestacked_videosresized_videosprocessed_videos_groupedprocessed_videoss                          r   _preprocess"InternVLVideoProcessor._preprocessb   s	   $ 0EV/L,,!#%3%9%9%;!E>!%!4!4^!D!%^$!Z,:"5) &< ((>@TU 0E^/T,,#% %3%9%9%;!E>!%!1!1.)!L!77
LV_N /=$U+ &< **BDXY"79I!JXfggr   r   )NNN)N)#r   r   r   r   r   BICUBICr=   r   rC   r   rD   r<   r;   r@   rB   r:   r   do_sample_framesr   valid_kwargsr	   r)   r   r   r   r   r7   listr   strr   r   rX   r   __classcell__)r+   s   @r   r"   r"      s   !))H!JIC(DIJLNM3L#(H!I # "&"&3711 $J1 5[4	1
 e|c)D01B 37+h^$+h +h 	+h
 +h L+h +h +h +h +h +h DK'$.+h 4;&-+h j(4/+h  
!+h +hr   r"   )__doc__r4   $torchvision.transforms.v2.functional
transformsv2
functionaltvFimage_processing_utilsr   image_utilsr   r   r   r   processing_utilsr	   r
   utilsr   video_processing_utilsr   video_utilsr   r   r   r   r"   __all__r   r   r   <module>rm      sU    /  2 2 2 Z Z 4  8 O O&|5 &nh/ nhb $
$r   