
    Z j#                         S r SSKJr  SSKrSSKJr  SSKJr  SSK	J
r
  SSKJrJr  SS	KJrJrJrJrJrJr  SS
KJrJr  SSKJrJrJrJr  \R:                  " \5      r " S S\SS9r \ " S S\5      5       r!S/r"g)z$Image processor class for MobileViT.    )UnionN)
functional   )TorchvisionBackend)BatchFeature)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDChannelDimension
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringloggingrequires_backendsc                   .    \ rS rSr% Sr\\S'   \\S'   Srg)MobileVitImageProcessorKwargs,   a  
do_flip_channel_order (`bool`, *optional*, defaults to `self.do_flip_channel_order`):
    Whether to flip the color channels from RGB to BGR or vice versa.
do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
    Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
    is used for background, and background itself is not included in all classes of a dataset (e.g.
    ADE20k). The background label will be replaced by 255.
do_flip_channel_orderdo_reduce_labels N)__name__
__module____qualname____firstlineno____doc__bool__annotations____static_attributes__r       ډ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/mobilevit/image_processing_mobilevit.pyr   r   ,   s      r$   r   F)totalc                     ^  \ rS rSrSr\r\R                  r	\
r\rSS0rSrSSS.rSrSrSrS	rS	rSrSrS
\\   4U 4S jjr\ S)S\S\S	-  S
\\   S\4U 4S jjj5       r S)S\S\S	-  S\S\S\ \!-  S	-  S\"\ S4   S	-  S\4S jjr#S\$S   S\$S   4S jr%S*S jr&  S+S\$S   S\S\'SSS\S\'S \S!\(S"\S	-  S#\S$\S\$S   4S% jjr)S)S&\$\*   S	-  4S' jjr+S(r,U =r-$ ),MobileViTImageProcessor:   zSTorchvision backend for MobileViT with flip_channel_order and reduce_label support.shortest_edge   F   )heightwidthTNkwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__)selfr/   	__class__s     r%   r2    MobileViTImageProcessor.__init__N   s    "6"r$   imagessegmentation_mapsreturnc                 &   > [         TU ]  " X40 UD6$ )zX
segmentation_maps (`ImageInput`, *optional*):
    The segmentation maps to preprocess.
)r1   
preprocess)r3   r6   r7   r/   r4   s       r%   r:   "MobileViTImageProcessor.preprocessQ   s     w!&FvFFr$   do_convert_rgbinput_data_formatreturn_tensorsdeviceztorch.devicec                    U R                  XXFS9nUR                  5       nSUS'   0 n	U R                  " U40 UD6U	S'   Ub  U R                  USS[        R                  S9n
UR                  5       nUR                  SS[        R                  S.5        U R                  " SSU
0UD6n
U
 Vs/ s H1  nUR                  S	5      R                  [        R                  5      PM3     n
nXS
'   [        XS9$ s  snf )z"Handle extra inputs beyond images.)r6   r<   r=   r?   Fr   pixel_values   )r6   expected_ndimsr<   r=   )
do_rescaler   resampler6   r   labels)datatensor_typer   )_prepare_image_like_inputscopy_preprocessr   FIRSTupdater   NEARESTsqueezetotorchint64r   )r3   r6   r7   r<   r=   r>   r?   r/   images_kwargsrG   processed_segmentation_mapssegmentation_maps_kwargsprocessed_segmentation_maps                r%   _preprocess_image_like_inputs5MobileViTImageProcessor._preprocess_image_like_inputs^   s/    00L] 1 
 ,1()#//H-H^(*.*I*I( $"2"8"8	 +J +' (.{{}$$++"'-2 2 : :	 +/*:*: +2+6N+' 3N+2M. +221588E2M ( + 9NBB+s   ,8C3rF   torch.Tensorc           
      b   [        [        U5      5       H  nX   n[        R                  " US:H  [        R                  " SUR
                  UR                  S9U5      nUS-
  n[        R                  " US:H  [        R                  " SUR
                  UR                  S9U5      nX1U'   M     U$ )z/Reduce label values by 1, replacing 0 with 255.r      )dtyper?         )rangelenrQ   wheretensorr\   r?   )r3   rF   idxlabels       r%   reduce_label$MobileViTImageProcessor.reduce_label   s    V%CKEKK
ELLEKKX]XdXd,eglmEAIEKKell3ekkZ_ZfZf.ginoE3K & r$   c                     UR                   S:X  a  UR                  5       nU/ SQ   USS& U$ UR                   S:X  a&  UR                  5       nUSS2/ SQ4   USS2SS24'   U$ U$ )zFlip RGB to BGR or vice versa.r   )rB   r]   r   r      N)ndimclone)r3   r6   flippeds      r%   flip_channel_order*MobileViTImageProcessor.flip_channel_order   si    ;;!llnG!),GAaLN[[AllnG$Q	\2GAqsFONr$   	do_resizesizerE   z7PILImageResampling | tvF.InterpolationMode | int | Nonedo_center_crop	crop_sizerD   rescale_factordisable_groupingr   r   c                    U
(       a  U R                  U5      n[        XS9u  p0 nUR                  5        H$  u  nnU(       a  U R                  UX45      nUUU'   M&     [	        X5      n[        UU	S9u  p0 nUR                  5        HU  u  nnU(       a  U R                  UU5      nU(       a  U R                  UU5      nU(       a  U R                  U5      nUUU'   MW     [	        UU5      nU$ )z#Custom preprocessing for MobileViT.)rs   )re   r   itemsresizer	   center_croprescalerl   )r3   r6   rn   ro   rE   rp   rq   rD   rr   rs   r   r   r/   grouped_imagesgrouped_images_indexresized_images_groupedshapestacked_imagesresized_imagesprocessed_images_groupedprocessed_imagess                        r%   rK   #MobileViTImageProcessor._preprocess   s      &&v.F/DV/o,!#%3%9%9%;!E>!%^T!L,:"5) &< ((>U/D^fv/w,#% %3%9%9%;!E>!%!1!1.)!L!%nn!M$!%!8!8!H.<$U+ &< **BDXYr$   target_sizesc                 V   [        U S5        UR                  nUb  [        U5      [        U5      :w  a  [        S5      e[	        U[
        R                  5      (       a  UR                  5       n/ n[        [        U5      5       Ha  n[
        R                  R                  R                  X5   R                  SS9X%   SSS9nUS   R                  SS9nUR                  U5        Mc     U$ UR                  SS9n[        UR                  S   5       Vs/ s H  oU   PM	     nnU$ s  snf )	z\Converts the output of [`MobileViTForSemanticSegmentation`] into semantic segmentation maps.rQ   zTMake sure that you pass in as many target sizes as the batch dimension of the logitsr   )dimbilinearF)ro   modealign_cornersr]   )r   logitsr`   
ValueError
isinstancerQ   Tensornumpyr_   nnr   interpolate	unsqueezeargmaxappendr|   )	r3   outputsr   r   semantic_segmentationrc   resized_logitssemantic_mapis	            r%   "post_process_semantic_segmentation:MobileViTImageProcessor.post_process_semantic_segmentation   s.   $(#6{c,// j  ,55+113$&!S[)!&!4!4!@!@K))a)0|7Hzin "A "  .a077A7>%,,\: * %$ %+MMaM$8!GLMbMhMhijMkGl$mGl!1%=Gl!$m$$ %ns   D&r   )N)r6   rY   r8   rY   )FT).r   r   r   r   r    r   valid_kwargsr   BICUBICrE   r
   
image_meanr   	image_stdro   default_to_squarerq   rn   rp   rD   do_normalizer<   r   r   r   r2   r   r   r   r:   r!   r   strr   r   rW   listre   rl   r   floatrK   tupler   r#   __classcell__)r4   s   @r%   r(   r(   :   s   ]0L!))H'J%IS!D-IINJLN #(E!F #  04
G
G &,
G 67	
G
 

G 
G& 59/C/C &,/C 	/C
 ,/C j(4//C c>)*T1/C 
/Cb4#7 D<P 2 "'&*& ^$&  &  	& 
 L&  &  &  &  &  +&  &   $&  
n	& P%UVZHZ % %r$   r(   )#r    typingr   rQ   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   image_transformsr   r	   image_utilsr
   r   r   r   r   r   processing_utilsr   r   utilsr   r   r   r   
get_loggerr   loggerr   r(   __all__r   r$   r%   <module>r      s    +   7 ; 2 E  5  
		H	%L  i%0 i% i%X %
%r$   