
    Z j,                         S SK rS SKrS SKJr  SSKJr  SSKJ	r	J
r
JrJrJrJr  SSKJrJr  SSKJrJr  SSKJrJr  SS	KJrJrJr  S
SKJr   " S S\SS9r\" S\S5       " S S\5      5       rS/r g)    N)
functional   )BatchFeature)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimensionPILImageResamplingSizeDictget_image_size)UnpackVideosKwargs)
TensorTypeadd_start_docstrings)BASE_VIDEO_PROCESSOR_DOCSTRINGBaseVideoProcessor)VideoMetadatagroup_videos_by_shapereorder_videos   )smart_resizec                   R    \ rS rSr% \\\4   \S'   \\S'   \\S'   \\S'   \\S'   Srg)	Glm46VVideoProcessorInitKwargs*   max_image_size
patch_sizetemporal_patch_size
merge_sizemax_duration N)	__name__
__module____qualname____firstlineno__dictstrint__annotations____static_attributes__r       ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/glm46v/video_processing_glm46v.pyr   r   *   s&    cN"OOr)   r   F)totalzfConstructs a fast GLM-4V image processor that dynamically resizes videos based on the original videos.aj  
        patch_size (`int`, *optional*, defaults to 14):
            The spacial patch size of the vision encoder.
        temporal_patch_size (`int`, *optional*, defaults to 2):
            The temporal patch size of the vision encoder.
        merge_size (`int`, *optional*, defaults to 2):
            The merge size of the vision encoder to llm encoder.
    c                     ^  \ rS rSr\R
                  rSSS.rSS0r\	r
\rSrSrSrSrSrSrSrS	rSr\rS
rSrSS/rS\\   4U 4S jjrS\4U 4S jjr S'S\S\ \!-  S-  4S jjr"SSS\R
                  SSSSSSSSS4S\#\$RJ                     S\&S\&S\'S-  SSS\&S\!S\&S\!\#\!   -  S-  S \!\#\!   -  S-  S!\ S-  S"\ S-  S#\ S-  S$\(\)-  S-  4S% jjr*S&r+U =r,$ )(Glm46VVideoProcessor2   i 1  i )shortest_edgelongest_edger0   T      ,     pixel_values_videosvideo_grid_thwkwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__)selfr7   	__class__s     r*   r:   Glm46VVideoProcessor.__init__S   s    "6"r)   returnc                    > [         TU ]  " S0 UD6nUR                  SU R                  5      nUR                  (       a  UR
                  (       d  [        S5      eU$ )z
Update kwargs that need further processing before being validated
Can be overridden by subclasses to customize the processing of kwargs.
sizez:size must contain 'shortest_edge' and 'longest_edge' keys.r   )r9   _standardize_kwargsgetr@   r/   r0   
ValueError)r;   r7   r@   r<   s      r*   rA   (Glm46VVideoProcessor._standardize_kwargsV   sM    
 ,6v6zz&$)),!!):):YZZr)   Nmetadatafpsc                    Ub  [        USS5      c  [        S5      eUR                  nUS-
  nUR                  =(       d    [	        XQR
                  -  5      S-   nSSSS.nSnS	n	[        Xi5      n
U
S
::  a  US
   nOU
S::  a  US   nOUS	   n[        X-  U R                  -  5      n[        X5      nSUR
                  -  n[        U5       Vs/ s H  oU-  PM	     nn[        U5      nXL:  a-  [        R                  " SUS-
  U[        S9R                  5       nOQ/ nSnSU R                  U-  -  n[        U5       H,  nUU   U:  d  M  UU-  nUR                  U5        UU:  d  M,    O   [        U5      U:  aU  [        U5      S:X  a  S[        US-
  S5      nnO
US   US   nn[        R                  " UUU[        S9R                  5       nO;[        U5      U:  a,  [        R                  " SUS-
  U[        S9R                  5       n[!        5       / nnU H-  nUU;  d  M  UR#                  U5        UR                  U5        M/     [        U5      S-  (       a  UR                  US   5        [        R$                  " U5      $ s  snf )a?  
Args:
    metadata (`VideoMetadata`):
        Metadata of the video containing information about total duration, fps and total number of frames.
    fps (`int` or `float`, *optional*):
        Target frames to sample per second. Defaults to `self.fps`.
Returns:
    np.ndarray:
        Indices to sample video frames.
NrF   zAsked to sample frames per second but no video metadata was provided which is required when sampling in Glm46V. Please pass in `VideoMetadata` object or set `do_sample_frames=False`r   r   g      ?)   r3   `	  i  rI   rH   r3   r   )dtype)getattrrC   total_num_framesdurationroundrF   minr&   r   rangenplinspacetolistappendlenmaxsetaddarray)r;   rE   rF   r7   total_framesmax_frame_idxrN   DYNAMIC_FPS_THRESMAX_FRAME_COUNT_DYNAMICMAX_DURATIONeffective_duration
target_fps	extract_tduration_per_framei
timestamps
max_secondframe_indicescurrent_secondinv_fpsframe_indexstartendseenuniqidxs                             r*   sample_frames"Glm46VVideoProcessor.sample_framesa   s     wx=EX 
  00$q($$Omll.J(Ka(O!"#6"% 8#*2.J3&*3/J*40J*7$:R:RRS		;	-6;L6IJ6I,,6I
J]
#KK<!+;YcRYY[MMN433j@AG$\2k*n<"g-N!((5%3  3 }	)=!Q&L1$4a 8ss*1-}R/@sKKsISIPPRM)+KK<!+;YcRYY[MUBd C$C  !
 t9q=KKR!xx~E Ks   Jgp?videosdo_convert_rgb	do_resizer@   resamplez7PILImageResampling | tvF.InterpolationMode | int | None
do_rescalerescale_factordo_normalize
image_mean	image_stdr   r   r   return_tensorsc                    [        U5      u  nn0 nUR                  5        H  u  nnUR                  u  nnnnnUUUnnnU(       am  [        UUUUX-  UR                  UR
                  S9u  nnUR                  UU-  UUU5      nU R                  U[        UUS9US9nUR                  UUUUU5      nUUU'   M     [        UU5      n[        U5      u  nn0 n 0 n!UR                  5        GH  u  nn[        US   [        R                  S9u  nnU R                  UXgXU
5      nUn"U"R                  S   U-  S:w  a8  U"S S 2SS 24   R                  SUS-
  SSS5      n#[        R                   " U"U#/SS9n"U"R                  S S	 u  n$n%n&U%U-  n%UU-  UU-  n(n'U"R                  U$U%UU&U'U-  UUU(U-  UU5
      n"U"R#                  SSS
SSSS	SSS5
      n"U"R%                  U$U%U'-  U(-  U&U-  U-  U-  5      n)U)U U'   U%U'U(//U$-  U!U'   GM     [        U U5      n*[        U!U5      n![        R                   " U*SS9n+[        R&                  " U!5      n,U+U,S.n-[)        U-US9$ )N)
num_framesheightwidthtemporal_factorfactor
min_pixels
max_pixels)r~   r   )r@   ru   r   )channel_dimr   rK   )dimr               r2      	   )r5   r6   )datatensor_type)r   itemsshaper   r/   r0   viewresizer
   r   r   r   FIRSTrescale_and_normalizerepeattorchcatpermutereshapetensorr   ).r;   rr   rs   rt   r@   ru   rv   rw   rx   ry   rz   r   r   r   r{   r7   grouped_videosgrouped_videos_indexresized_videos_groupedr   stacked_videosBTCHWr}   r~   r   resized_heightresized_widthresized_videosprocessed_videos_groupedprocessed_gridspatchesrepeats
batch_sizegrid_tchannelgrid_hgrid_wflatten_patchesprocessed_videosr5   r6   r   s.                                                 r*   _preprocess Glm46VVideoProcessor._preprocess   s   $ 0EV/L,,!#%3%9%9%;!E>*00MAq!Q()1aJ0<)!$7%2#11#001- "0!4!4QUAq!!D!%"!}M% "- "
 "0!4!4Q1nm!\,:"5)) &<* ((>@TU 0E^/T,,#% %3%9%9%;!E>,:>!;LZjZpZp,q)NM "77
LV_N %G }}Q"55:!!RS&/004G!4KQPQSTU))Wg$6A>*1--*;'J22F+z9=J;VFFll#*$*$G ooaAq!Q1aCG%oo&(--
:ZGO />$U+'-vv&>%?*%LOE"K &<N **BDXY(:NO#ii(8a@o6#6,

 >BBr)   r   )N)-r    r!   r"   r#   r	   BICUBICru   r@   r   r   ry   r   rz   rt   rv   rx   rs   do_sample_framesr   r   r   r   r   valid_kwargsr}   rF   model_input_namesr   r:   r$   rA   r   r&   floatrp   listr   Tensorboolr
   r%   r   r   r(   __classcell__)r<   s   @r*   r-   r-   2   s    "))H&8KLD$&9:N!JIIJLNJLJ1LJ
C.0@A#(F!G #	t 	 #'JJ 5[4J^  $ $N`NhNh )!1504!%*.!%26aCU\\"aC aC 	aC
 oaC LaC aC aC aC DK'$.aC 4;&-aC $JaC !4ZaC $JaC j(4/aC aCr)   r-   )!numpyrR   r   torchvision.transforms.v2r   tvFimage_processing_utilsr   image_utilsr   r   r   r	   r
   r   processing_utilsr   r   utilsr   r   video_processing_utilsr   r   video_utilsr   r   r   image_processing_glm46vr   r   r-   __all__r   r)   r*   <module>r      s|   ,   7 2  5 5 X O O 1\  l"PC- PCPCf "
"r)   