
    Z ji              
          S r SSKrSSKJr  SSKrSSKrSSKJr	  SSK
Jr  SSKJr  SSKJrJrJr  SS	KJrJrJrJrJrJr  SS
KJrJr  SSKJrJrJr   " S S\SS9r   S#SSS\!\"\"4   S-  S\"S-  4S jjr#S\!\$\"4   S\%\"\"4   4S jr&S\'\RP                     S\'\'\"      S\%\'\RP                     \'\'\"      4   4S jr)S r*S$S jr+   S%S\,S\,S\%\"\"4   S-  4S  jjr-\ " S! S"\5      5       r.S"/r/g)&zImage processor class for EoMT.    N)Union)
functional   )TorchvisionBackend)BatchFeature)get_size_with_aspect_ratiogroup_images_by_shapereorder_images)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDChannelDimension
ImageInputPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstringfilter_out_non_signature_kwargsc                   4    \ rS rSr% Sr\\S'   \S-  \S'   Srg)EomtImageProcessorKwargs*   a[  
do_split_image (`bool`, *optional*, defaults to `self.do_split_image`):
    Whether to split the input images into overlapping patches for semantic segmentation. If set to `True`, the
    input images will be split into patches of size `size["shortest_edge"]` with an overlap between patches.
    Otherwise, the input images will be padded to the target size.
ignore_index (`int`, *optional*, defaults to `self.ignore_index`):
    Label to be assigned to background pixels in segmentation maps. If provided, segmentation map pixels
    denoted with 0 (background) will be replaced with `ignore_index`.
do_split_imageNignore_index )	__name__
__module____qualname____firstlineno____doc__bool__annotations__int__static_attributes__r       /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/eomt/image_processing_eomt.pyr   r   *   s     *r%   r   F)totalsegmentation_maptorch.Tensorinstance_id_to_semantic_idr   c                 |   Ub  [         R                  " U S:H  X S-
  5      n [         R                  " U 5      nUb  X3U:g     nU Vs/ s H  o@U:H  PM	     nnU(       a  [         R                  " USS9nO-[         R                  " S/U R
                  Q7U R                  S9nUbu  [         R                  " UR
                  S   U R                  S9n[        U5       H9  u  pGXb  UR                  5       S-   OUR                  5          nUb  US-
  OUXd'   M;     OUnUR                  5       UR                  5       4$ s  snf )Nr      dimdevice)torchwhereuniquestackzerosshaper0   	enumerateitemfloatlong)	r(   r*   r   
all_labelsibinary_maskslabelslabelclass_ids	            r&   -convert_segmentation_map_to_binary_masks_fastrA   :   s1   
  ;;'71'<l_`L`a./Jl :;
5?@Z*ZL@{{<Q7{{A#?(8(>(>#?HXH_H_` "-Z--a09I9P9PQ!*-HA1G_5::<!3CejeoeoeqsH(4(@1hFI . .. As   D9	size_dictreturnc                 .    U S   nU S   =(       d    UnX4$ )z.Returns the height and width from a size dict.shortest_edgelongest_edger   )rB   target_heighttarget_widths      r&   get_target_sizerI   Y   s$    o.M^,=L&&r%   patchesoffsetsc                     [        [        X5      5      nUR                  S S9  [        U6 u  p4[        U5      [        U5      4$ )z@Sorts patches and offsets according to the original image index.c                     U S   S   $ )Nr   r   )xs    r&   <lambda>-reorder_patches_and_offsets.<locals>.<lambda>g   s    !Qr%   )key)listzipsort)rJ   rK   combinedsorted_offsetssorted_patchess        r&   reorder_patches_and_offsetsrX   a   sC    
 C)*HMM'M(%((^"Nn!555r%   c                     U R                   S   UR                   S   s=:X  a  UR                   S   :X  d  O  [        S5      eUR                  U5      X:  -  nX   X   X%   4$ )a  
Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
`labels`.

Args:
    masks (`torch.Tensor`):
        A tensor of shape `(num_queries, height, width)`.
    scores (`torch.Tensor`):
        A tensor of shape `(num_queries)`.
    labels (`torch.Tensor`):
        A tensor of shape `(num_queries)`.
    object_mask_threshold (`float`):
        A number between 0 and 1 used to binarize the masks.
Raises:
    `ValueError`: Raised when the first dimension doesn't match in all input tensors.
Returns:
    `tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
    < `object_mask_threshold`.
r   z1mask, scores and labels must have the same shape!)r6   
ValueErrorne)masksscoresr>   object_mask_threshold
num_labelsto_keeps         r&   remove_low_and_no_objectsra   m   s^    ( KKNfll1o@a@LMMii
#v'EFG>6?FO;;r%   c                    X:H  nUR                  5       nX   U:  nUR                  5       nXW-  n	U	R                  5       n
US:  =(       a    US:  =(       a    U
S:  nU(       a  Xh-  nUR                  5       U:  d  SnX4$ )Nr   F)sumr8   )mask_labels
mask_probskmask_thresholdoverlap_mask_area_thresholdmask_kmask_k_areaoriginal_maskoriginal_area
final_maskfinal_mask_areamask_exists
area_ratios                r&   check_segment_validityrq      s    F**,K M^3M!%%'M'J nn&O/Oma&7OOa<OK 0
 #>>K""r%   rg   rh   target_sizec                 p   Uc  U R                   S   OUS   nUc  U R                   S   OUS   n[        R                  " Xx4[        R                  U R                  S9S-
  n	/ n
U R                  5       n US S 2S S 4   U -  R                  S5      nSn0 n[        UR                   S   5       H  nX.   R                  5       n[        XXU5      u  nnU(       d  M.  U(       a  X;   a  X;   a	  X   U	U'   MH  XU'   XU'   [        X   R                  5       S5      nU
R                  UUUS.5        US-  nM     X4$ )Nr,   r      )dtyper0      idlabel_idscore)r6   r1   r5   r:   r0   sigmoidargmaxranger8   rq   roundappend)re   pred_scorespred_labelsstuff_classesrg   rh   rr   heightwidthsegmentationsegmentsrd   current_segment_idstuff_memory_listrf   
pred_classro   rm   segment_scores                      r&   compute_segmentsr      sc    %0$7Za [^F#.#6JQKNE;;ejjIZIZ[^__LH ##%Jq$}-
:BB1EK (*;$$Q'( ^((*
 #9Q8S#
Z Z8.+<+HZ(0B*-#5Z kn113Q7(&&	
 	a7 )8 !!r%   c                     ^  \ rS rSr\r\R                  r\	r
\rSSS.rSrSrSrSrSrSrSrS\\   4U 4S jjrS	\R0                  S
\S\\   S\\\4   4S jrS	\R0                  S
\S\R0                  4S jr\  S2S	\ S\\R0                     S-  S\!\\4   S-  S\\   S\"4
U 4S jjj5       r# S3S	\ S\ S-  S\!\\4   S-  S\$S\%S\&\'-  S-  S\(\&S4   S-  S\\   S\"4S jjr)S	\S   S\$S
\SSS\$S\*S\$S\$S \$S!\*\\*   -  S-  S"\*\\*   -  S-  S#\$S-  4S$ jr+S%\R0                  S&\\\\\4      S'\\\\4      S
\!\&\4   S\\R0                     4
S( jr,S%\R0                  S'\\\\4      S
\!\&\4   S\\R0                     4S) jr- S3S'\\\\4      S
\!\&\4   S-  S\.R^                  4S* jjr0     S4S'\\\\4      S+\*S,\*S-\*S.\\   S-  S
\!\&\4   S-  4S/ jjr1\2" 5         S5S'\\\\4      S+\*S
\!\&\4   S-  4S0 jj5       r3S1r4U =r5$ )6EomtImageProcessor   i  rE   rF   FTNkwargsc                 &   > [         TU ]  " S0 UD6  g )Nr   )super__init__)selfr   	__class__s     r&   r   EomtImageProcessor.__init__   s    "6"r%   imagessizeimage_indicesrC   c                    / / pTUR                   u    pgnUR                  n	[        Xx5      n
[        R                  " X-  5      nX-  U
-
  nUS:  a  XS-
  -  OSn[        U5       H  n[        XU-
  -  5      nX-   nXx:  a  USS2SS2UU2SS24   nOUSS2SS2SS2UU24   n[        [        R                  " USS95       H.  u  nnUR                  U5        UR                  UU   UU/5        M0     M     XE4$ )zCSlices an image into overlapping patches for semantic segmentation.r,   r   Nr-   )r6   rE   maxmathceilr}   r#   r7   r1   unbindr   )r   r   r   r   rJ   patch_offsets_r   r   
patch_sizelonger_sidenum_patchestotal_overlapoverlap_per_patchr<   startendbatch_patch	batch_idxsingles                       r&   _split_imageEomtImageProcessor._split_image   s    "$R$ll1e''
&(ii 89#0;>ALqM1_=VW{#A*;;<=E$C~$Q59a%78$Q1eCi%78%.u||KQ/O%P!	6v&$$mI&>s%KL &Q $ %%r%   c                 *   UR                   u    p4n[        UR                  UR                  =(       d    UR                  S.5      u  pg[	        SXd-
  5      n[	        SXu-
  5      n	SU	SU4n
[
        R                  R                  R                  XSSS9nU$ )z5Pads the image to the target size using zero padding.r   r   constantg        )modevalue)	r6   rI   rE   rF   r   r1   nnr   pad)r   r   r   r   r   r   rG   rH   pad_hpad_wpaddingpadded_imagess               r&   _padEomtImageProcessor._pad  s    $ll1e&5"00$BSBSBiW[WiWij'
# A}-.A|+,eQ&++//jX[/\r%   segmentation_mapsr*   c                 (   > [         TU ]  " XU40 UD6$ )z
segmentation_maps (`ImageInput`, *optional*):
    The segmentation maps to preprocess for corresponding images.
instance_id_to_semantic_id (`list[dict[int, int]]` or `dict[int, int]`, *optional*):
    A mapping between object instance ids and class ids.
)r   
preprocess)r   r   r   r*   r   r   s        r&   r   EomtImageProcessor.preprocess  s     w!&=Wb[abbr%   do_convert_rgbinput_data_formatreturn_tensorsr0   ztorch.devicec                 l   U R                  XXWS9nUR                  SS5      n	UR                  5       n
0 nU R                  " U40 U
D6u  pXS'   XS'   UGb  U R                  USS[        R
                  S9nUR                  5       nUR                  SS[        R                  S	.5        U R                  " SS
U0UD6u  nnU Vs/ s H1  nUR                  S5      R                  [        R                  5      PM3     nn/ / nn[        U5       HS  u  nn[        U[        5      (       a  UU   nOUn[!        UUU	S9u  nnUR#                  U5        UR#                  U5        MU     UUS'   UUS'   U(       a*  U Vs/ s H  n[        R$                  " U5      PM     snUS'   ['        UU/ SQS9$ s  snf s  snf )z
Preprocess image-like inputs.
)r   r   r   r0   r   Npixel_valuesr   rt   F)r   expected_ndimsr   r   )do_normalize
do_rescaleresampler   r   )r   rd   class_labels)r   rd   r   )datatensor_typeskip_tensor_conversionr   )_prepare_image_like_inputspopcopy_preprocessr   FIRSTupdater   NEARESTsqueezetor1   int64r7   
isinstancerR   rA   r   tensorr   )r   r   r   r*   r   r   r   r0   r   r   images_kwargsr   processed_imagesr   processed_segmentation_mapssegmentation_maps_kwargsr   r(   rd   r   idxinstance_idr\   classesrK   s                            r&   _preprocess_image_like_inputs0EomtImageProcessor._preprocess_image_like_inputs!  s    00L] 1 
 zz.$7*.*:*:6*S]*S'/^ -_(*.*I*I( $"2"8"8	 +J +' (.{{}$$++$)"' 2 : :	 .2-=-= .2.6N.*' Up+To@P ((+..u{{;To ( + )+BK)23N)O%%8$??"<S"AK"<K!N$!-"w ""5)##G, *P  #.D#/D JW$X-wU\\'%:-$XD!&#S
 	
7+2 %Ys   8F,9 F1r)   	do_resizer   z7PILImageResampling | tvF.InterpolationMode | int | Noner   rescale_factorr   r   do_pad
image_mean	image_stddisable_groupingc           	      ^   / n[        XS9u  nn0 nUR                  5        H"  u  nnU(       a  U R                  UX4S9nUUU'   M$     [        UU5      nU(       a  [        XS9u  nn/ / nnUR                  5        Hp  u  nnUR                  5        VVVs/ s H  u  nu  nnUU:X  d  M  UPM     nnnnU R	                  UUU5      u  nnUR                  U5        UR                  U5        Mr     [        UU5      u  pU	(       aN  [        XS9u  nnUR                  5        VVs0 s H  u  nnUU R                  UU5      _M     nnn[        UU5      n[        XS9u  nn0 nUR                  5        H  u  nnU R                  UXVXzU5      nUUU'   M!     [        UU5      nUU4$ s  snnnf s  snnf )z4Preprocesses the input images and masks if provided.)r   )imager   r   )	r	   itemsresizer
   r   extendrX   r   rescale_and_normalize)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   grouped_imagesgrouped_images_indexresized_images_groupedr6   stacked_imagesrJ   original_idx	img_shaper   original_indicessplit_patchesrK   padded_groupedprocessed_images_groupedr   s                                 r&   r   EomtImageProcessor._preprocesso  s   " /DV/o,,!#%3%9%9%;!E>!%>!`,:"5) &<   68LM3H3s0N0%']G)7)=)=)?%~EYE_E_Ea$Ea%A\>IqenrwewLEa ! $ *.):):>4Qa)b&w}-$$W- *@ %@$W!F3H3s0N0TbThThTjTj;P5.tyy66Tj   $N4HIF/DV/o,,#% %3%9%9%;!E>!77
LV_N /=$U+	 &<
 **BDXY..1$s   F"
(F"
" F)segmentation_logitsr   target_sizesc                 ,   UR                   S   n/ n/ nU Hw  n[        XS   US   5      u  pUR                  [        R                  " XYU
4UR
                  S95        UR                  [        R                  " XYU
4UR
                  S95        My     [        U5       H{  u  nu  pnX<   S   X<   S   :  a2  Xl   SS2X2SS24==   X   -  ss'   X|   SS2X2SS24==   S-  ss'   MK  Xl   SS2SS2X24==   X   -  ss'   X|   SS2SS2X24==   S-  ss'   M}     / n[        [        Xg5      5       H_  u  nu  nnUUR                  SS9-  n[        R                  R                  R                  US   UU   S	S
S9S   nUR                  U5        Ma     U$ )aF  
Reconstructs full-size semantic segmentation logits from patch predictions.

Args:
    segmentation_logits (`torch.Tensor`):
        A tensor of shape `(num_patches, num_classes, patch_height, patch_width)` representing predicted logits
        for each image patch.
    patch_offsets (`list[tuple[int, int, int]]`):
        A list of tuples where each tuple contains:
        - `image_index` (int): Index of the original image this patch belongs to.
        - `start` (int): Start pixel index of the patch along the long dimension (height or width).
        - `end` (int): End pixel index of the patch along the long dimension.
    target_sizes (`list[tuple[int, int]]`):
        list of original (height, width) dimensions for each image before preprocessing.
    size (`dict[str, int]`):
        A size dict which was used to resize.
r,   rE   rF   r/   r   N)minN.bilinearFr   r   align_corners)r6   r   r   r1   r5   r0   r7   rS   clampr   r   interpolate)r   r   r   r   r   num_classesaggregated_logitspatch_counts
image_sizer   r   	patch_idx	image_idxpatch_start	patch_endreconstructed_logitsr   	logit_sumcountaveraged_logitsresized_logitss                        r&   merge_image_patches&EomtImageProcessor.merge_image_patches  s   0 *//2&J6zCXZ^_mZnoMF$$U[[+u1MViVpVp%qr[%,HQdQkQk lm ' ?H>V:I:		&q)L,CA,FF!,Q0Eq-HIM`MkkI';+@!(CDID!,Q;3H-HIM`MkkI'1k.C(CDID ?W  "'05F1U'V#C#)U'%++!+*<<O"XX00<<	*!#&#	 = 
 N !''7 (W $#r%   c                     / n[        U5       Hj  u  pV[        XcS   US   5      u  pxX   SS2SU2SU24   n	[        R                  R                  R                  U	S   USSS9S   n
UR                  U
5        Ml     U$ )	zJRestores panoptic segmentation logits to their original image resolutions.rE   rF   Nr   r   Fr   r   )r7   r   r1   r   r   r   r   )r   r   r   r   r  r   original_sizerG   rH   cropped_logitsupsampled_logitss              r&   unpad_imageEomtImageProcessor.unpad_image  s     "+L"9C*DO4d>6J+'M 15a-,6VWN$xx22>>y)J^c  ?    !!"23 #: r%   c                    Ub  UOU R                   nUR                  nUR                  nUR                  n[	        U5      n[
        R                  R                  R                  UUSS9nUR                  SS9SSS24   nUR                  5       n	[
        R                  " SX5      n
U(       a  U R                  XX#5      nOl/ n[        [        U
5      5       HR  n[
        R                  R                  R                  X   R                  SS9X,   SS	S
9nUR!                  US   5        MT     U Vs/ s H  oR#                  SS9PM     nnU$ s  snf )zIPost-processes model outputs into final semantic segmentation prediction.Nr   r   r   r-   .zbqc, bqhw -> bchwr   Fr   )r   masks_queries_logitsclass_queries_logitsr   rI   r1   r   r   r   softmaxr{   einsumr  r}   len	unsqueezer   r|   )r   outputsr   r   r  r  r   output_sizemasks_classesmasks_probsr   output_logitsr   r  logitpredss                   r&   "post_process_semantic_segmentation5EomtImageProcessor.post_process_semantic_segmentation  s]    'tTYY&;;&;;--%d+$xx22>>   ?  
 -444<S#2#XF*224#ll+>[ 445HYelMMS!456!&!4!4!@!@',6616=%*#"'	 "A " $$^A%67 7 3@@-!$-@ As   1E	thresholdrg   rh   r   c                    Ub  UOU R                   nUR                  nUR                  n	U	R                  S   n
U	R                  S   S-
  n[	        U5      n[
        R                  R                  R                  UUSS9nU R                  XU5      nU	R                  SS9R                  S5      u  p/ n[        U
5       H  n[        UU   UU   UU   X;5      u  nnnUR                  S   S::  aK  Ub  UU   OUR                  SS u  nn[
        R                  " UU45      S-
  nUR                  U/ S.5        M|  [!        UUUUUUUb  UU   OSS	9u  nnUR                  UUS.5        M     U$ )
zIPost-processes model outputs into final panoptic segmentation prediction.Nr   r  r,   r   r  r-   r   segments_info)re   r   r   r   rg   rh   rr   )r   r  r  r6   rI   r1   r   r   r   r  r  r   r}   ra   r5   r   r   )r   r  r   r'  rg   rh   r   r   r  r  
batch_sizer_   r  mask_probs_batchpred_scores_batchpred_labels_batchresultsr<   re   r   r   r   r   r   r   s                            r&   "post_process_panoptic_segmentation5EomtImageProcessor.post_process_panoptic_segmentation"  s    'tTYY&;;&;;)//2
)//3a7
%d+$xx22>>   ?  
  ++,@PTU/C/K/KPR/K/S/W/WXZ/[,z"A3L #%6q%9;LQ;OQZ40J[
 "a'3?3KQQ[QaQabcbdQe${{FE?;a?rRS%5%''+-,G/;/GLOT&"L( NNL8TU- #. r%   c           
      l   Ub  UOU R                   nUR                  nUR                  n[        U5      n[        R
                  R                  R                  UUSS9nU R                  XRU5      nUR                  n	UR                  S   n
UR                  S   n/ n[        U
5       GH  nX   nXm   nUR                  SS9SSS24   R                  S5      u  nnUS:  R                  5       nUR                  5       R!                  S	5      UR!                  S	5      -  R#                  S	5      UR!                  S	5      R#                  S	5      S
-   -  nUU-  n[        R$                  " X-   U	S9S	-
  n/ / nnSn[        U5       H  nUU   R'                  5       n[        R(                  " UU   S:H  5      (       a  M9  UU:  d  MA  UUUU   S	:H  '   UR+                  UUU   R'                  5       [-        US5      S.5        US	-  nUR+                  UU   5        M     UR+                  UUS.5        GM     U$ )zDPost-processes model outputs into Instance Segmentation Predictions.Nr   r  r   r  r-   .r,   gư>r/   rv   rw   r)  )r   r  r  rI   r1   r   r   r   r  r0   r6   r}   r  r   r9   r{   flattenrc   r5   r8   allr   r~   )r   r  r   r'  r   r  r  r  r,  r0   r+  num_queriesr/  r<   	mask_pred
mask_classr]   pred_classes
pred_masksmask_scoresr   r   instance_mapsr   r   jrz   s                              r&   "post_process_instance_segmentation5EomtImageProcessor.post_process_instance_segmentation[  sP    'tTYY&;;&;;%d+$xx22>>   ?  
  ++,@PTU%,,)//2
*004z"A(+I-0J $.#5#5"#5#=c3B3h#G#K#KB#O FL#a-..0J %,,.66q9J<N<Nq<QQVVWXY""1%))!,t3K !;.K ;;|vFJL&("8M!";'#A++-yyA!!344)9K7ILA!!34OO"4(4Q(<(<(>%*5!_ '!+&!((A7 ( NNL8TUC #D r%   r   NN)N)皙?      ?rA  NN)rA  N)6r   r   r   r   r   valid_kwargsr   BILINEARr   r   r   r   r   r   default_to_squarer   r   r   r   r   r   r   r   r1   Tensorr   rR   r#   tupler   r   r   r   dictr   r   r!   r   strr   r   r   r9   r   r  r  npndarrayr%  r0  r   r>  r$   __classcell__)r   s   @r&   r   r      s5   +L!**H&J$I #6DIJLNFL#(@!A #&5<< &x &PTUXPY &^cdhjndn^o &85<< x ELL   8<<@	cc  -4c %)cNT$9	c
 12c 
c c. 59L
L
 &,L
 %)cNT$9	L

 L
 ,L
 j(4/L
 c>)*T1L
 12L
 
L
\7/^$7/ 7/ 	7/
 L7/ 7/ 7/ 7/ 7/ 7/ DK'$.7/ 4;&-7/ +7/r7$"\\7$ E#sC-017$ 5c?+	7$
 38n7$ 
ell	7$r"\\ 5c?+ 38n	
 
ell	2 '+	* 5c?+* 38nt#	*
 
*`  #-0*.&*7 5c?+7 	7
 7 &+7 Cy4'7 38nt#7r %&
 &*? 5c?+? 	?
 38nt#? '?r%   r   r@  )rB  rA  )rB  rA  N)0r    r   typingr   numpyrJ  r1   torchvision.transforms.v2r   tvFimage_processing_backendsr   image_processing_utilsr   image_transformsr   r	   r
   image_utilsr   r   r   r   r   r   processing_utilsr   r   utilsr   r   r   r   rH  r#   rA   rI  rG  rI   rR   rF  rX   ra   rq   r9   r   r   __all__r   r%   r&   <module>rX     s`   &     7 ; 2 a a  5 |5 $ 9=#/$/ $S#X 5/ */>'tCH~ '%S/ '	6%,,	6*.tCy/	6
4tDI./	6<8#6  ),*.3"
 3" "'3" sCx4'3"l E+ E EP  
 r%   