
    Z jT                        S r SSKrSSKJrJr  SSKrSSKrSSKJr  SSK	J
r
  SSKJr  SSKJr  SS	KJrJr  SS
KJrJrJrJr  SSKJrJrJrJrJrJrJrJ r J!r!J"r"J#r#J$r$  SSK%J&r&J'r'  SSK(J)r)J*r*J+r+  \+RX                  " \-5      r.\R^                  \R`                  4r1 " S S\&SS9r2S r3S r4S r5S-S jr6    S.S\7S\7S\8\9   S-  S\:\9\94   S-  4S jjr;S\9S\9S\Rx                  S\Rz                  4S jr>  S/S \?S!\\@-  S-  4S" jjrAS#\Rz                  S\Rz                  4S$ jrBS% rC  S0S&\Rz                  S'\DS(\@\R                  -  S)\?S!\\@-  S\D4S* jjrF\* " S+ S,\5      5       rGS,/rHg)1zImage processor class for DETR.    N)AnyOptional)nn)
read_image)
functional   )TorchvisionBackend)BatchFeatureget_size_dict)center_to_corners_formatcorners_to_center_formatget_size_with_aspect_ratiosafe_squeeze)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDAnnotationFormatAnnotationTypeChannelDimension
ImageInputPILImageResamplingSizeDictget_image_size#get_image_size_for_max_height_widthget_max_height_widthvalidate_annotations)ImagesKwargsUnpack)
TensorTypeauto_docstringloggingc                   4    \ rS rSr% Sr\\-  \S'   \\S'   Sr	g)DetrImageProcessorKwargs<   a  
format (`str`, *optional*, defaults to `AnnotationFormat.COCO_DETECTION`):
    Data format of the annotations. One of "coco_detection" or "coco_panoptic".
do_convert_annotations (`bool`, *optional*, defaults to `True`):
    Controls whether to convert the annotations to the format expected by the DETR model. Converts the
    bounding boxes to the format `(center_x, center_y, width, height)` and in the range `[0, 1]`.
    Can be overridden by the `do_convert_annotations` parameter in the `preprocess` method.
formatdo_convert_annotations N)
__name__
__module____qualname____firstlineno____doc__strr   __annotations__bool__static_attributes__r&       /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/detr/image_processing_detr.pyr"   r"   <   s     """  r0   r"   F)totalc                 *   SSK Jn  U" U 5      (       a  U R                  5       n U R                  5       n[        R
                  " S/US//5      n[        R                  " USS USS :g  5      S   S-   nUSSS2==   USSS2   -  ss'   [        U5      $ )a  
Converts given binary mask of shape `(height, width)` to the run-length encoding (RLE) format.

Args:
    mask (`torch.Tensor` or `numpy.array`):
        A binary mask tensor of shape `(height, width)` where 0 denotes background and 1 denotes the target
        segment_id or class_id.
Returns:
    `List`: Run-length encoded list of the binary mask. Refer to COCO API for more information about the RLE
    format.
r   )is_torch_tensorr      N   )utilsr4   numpyflattennpconcatenatewherelist)maskr4   pixelsrunss       r1   binary_mask_to_rlerB   J   s     )tzz|\\^F^^aS&1#./F88F12J&"+-.q1A5DAJ$ss)J:r0   c                     [         R                  " U 5      n/ nU H9  n[         R                  " X:H  SS5      n[        U5      nUR	                  U5        M;     U$ )av  
Converts given segmentation map of shape `(height, width)` to the run-length encoding (RLE) format.

Args:
    segmentation (`torch.Tensor` or `numpy.array`):
        A segmentation map of shape `(height, width)` where each value denotes a segment or class id.
Returns:
    `list[List]`: A list of lists, where each list is the run-length encoding of a segment / class id.
r5   r   )torchuniquer=   rB   append)segmentationsegment_idsrun_length_encodingsidxr?   rles         r1   convert_segmentation_to_rlerL   b   sW     ,,|,K{{<.15 &##C( 
  r0   c                     U R                   S   UR                   S   s=:X  a  UR                   S   :X  d  O  [        S5      eUR                  U5      X:  -  nX   X   X%   4$ )a  
Binarize the given masks using `object_mask_threshold`, it returns the associated values of `masks`, `scores` and
`labels`.

Args:
    masks (`torch.Tensor`):
        A tensor of shape `(num_queries, height, width)`.
    scores (`torch.Tensor`):
        A tensor of shape `(num_queries)`.
    labels (`torch.Tensor`):
        A tensor of shape `(num_queries)`.
    object_mask_threshold (`float`):
        A number between 0 and 1 used to binarize the masks.
Raises:
    `ValueError`: Raised when the first dimension doesn't match in all input tensors.
Returns:
    `tuple[`torch.Tensor`, `torch.Tensor`, `torch.Tensor`]`: The `masks`, `scores` and `labels` without the region
    < `object_mask_threshold`.
r   z1mask, scores and labels must have the same shape!)shape
ValueErrorne)masksscoreslabelsobject_mask_threshold
num_labelsto_keeps         r1   remove_low_and_no_objectsrW   w   s^    ( KKNfll1o@a@LMMii
#v'EFG>6?FO;;r0   c                     X:H  nUR                  5       nX   U:  R                  5       nUS:  =(       a    US:  nU(       a  Xg-  n	U	R                  5       U:  d  SnX4$ )Nr   F)sumitem)
mask_labels
mask_probskmask_thresholdoverlap_mask_area_thresholdmask_kmask_k_areaoriginal_areamask_exists
area_ratios
             r1   check_segment_validityre      sj    F**,K  ]n499;M/7ma&7K  0
 #>>Kr0   r^   r_   label_ids_to_fusetarget_sizec                    Uc  U R                   S   OUS   nUc  U R                   S   OUS   n[        R                  " Xx4[        R                  U R                  S9n	/ n
Ub2  [
        R                  R                  U R                  S5      USSS9S   n SnXR                  SSS5      -  n U R                  S5      n0 n[        UR                   S   5       H  nX.   R                  5       nX;   n[        XXU5      u  nnU(       d  M2  X;   a  X   nOUS-  nXU'   [        X   R                  5       S	5      nU
R                  UUUUS
.5        U(       d  M  XU'   M     X4$ )Nr5   r   r7   dtypedevicebilinearFsizemodealign_cornersr6      )idlabel_id	was_fusedscore)rN   rD   zerosint32rk   r   r   interpolate	unsqueezeviewargmaxrangerZ   re   roundrF   )r\   pred_scorespred_labelsr^   r_   rf   rg   heightwidthrG   segmentscurrent_segment_idr[   stuff_memory_listr]   
pred_classshould_fuserc   r`   segment_scores                       r1   compute_segmentsr      s    %0$7Za [^F#.#6JQKNE;;ekk*J[J[\LH]]..  #+JV[ / 


  ""2q!,,J##A&K )+;$$Q'( ^((*
 5 5Q8S
V ;.%6%B""a'" $6 !+."5"5"7;MOO, *!,*	 {0B*-7 ): !!r0   r   r   rk   returnc                     SSK Jn  / nU  H  nUR                  XaU5      nUR	                  U5      n[        UR                  5      S:  a  US   n[        R                  " U[        R                  US9n[        R                  " USS9nUR                  U5        M     U(       a  [        R                  " USS9nU$ [        R                  " SX4[        R                  US9nU$ ! [         a    [        S5      ef = f)	a  
Convert a COCO polygon annotation to a mask.

Args:
    segmentations (`list[list[float]]`):
        List of polygons, each polygon represented by a list of x-y coordinates.
    height (`int`):
        Height of the mask.
    width (`int`):
        Width of the mask.
r   )r?   z1Pycocotools is not installed in your environment.r   ).Nri   r7   )axis)pycocotoolsr?   ImportErrorfrPyObjectsdecodelenrN   rD   	as_tensoruint8anyrF   stackrv   )	segmentationsr   r   rk   	coco_maskrQ   polygonsrlesr?   s	            r1   convert_coco_poly_to_maskr      s    O1 E!$$Xu=%tzz?Q	?Dt5;;vFyyA&T " E* L Q.ekk&QL#  OMNNOs   C$ $C:return_segmentation_masksinput_data_formatc                 $   U R                  5       SS u  pEUS   n[        R                  " U/[        R                  U R                  S9nUS   n/ n/ n	/ n
/ nU Hl  nSU;  d  US   S:X  d  M  UR                  US   5        U	R                  US	   5        U
R                  US
   5        SU;   d  MX  UR                  US   5        Mn     [        R                  " U[        R                  U R                  S9n[        R                  " U	[        R                  U R                  S9n	[        R                  " U[        R                  U R                  S9n[        R                  " U
[        R                  U R                  S9R                  SS5      n
U
SS2SS24==   U
SS2SS24   -  ss'   U
SS2SSS24   R                  SUS9U
SS2SSS24'   U
SS2SSS24   R                  SUS9U
SS2SSS24'   U
SS2S4   U
SS2S4   :  U
SS2S4   U
SS2S4   :  -  nUX   X   X   X   [        R                  " [        U5      [        U5      /[        R                  U R                  S9S.nU(       a_  [        R                  " U[        R                  U R                  S9nX   nUR                  S   nU(       a  UR                  S5      OUnXS'   U(       a1  U Vs/ s H  oS   PM	     nn[        UXEU R                  S9nUU   US'   U$ s  snf )zE
Convert the target in COCO format into the format expected by DETR.
Nimage_idri   annotationsiscrowdr   category_idareabbox	keypointsr6      r7   )minmaxr5   r   )r   class_labelsboxesr   r   	orig_size)r6   r   rG   rk   rQ   )rn   rD   r   int64rk   rF   float32
zeros_likereshapeclipintrN   r   )imagetargetr   r   image_heightimage_widthr   r   classesr   r   r   objr   keep
new_targetnum_keypointssegmentation_masksrQ   s                      r1   !prepare_coco_detection_annotationr     s    !&

RS 1Lj!HzU\\RH 'KGDEIC3y>Q#6NN3}-.KKF$LLV%c!  [!12  oogU[[NG??4u}}U\\JDwekk%,,OGOOEu||LTTUWYZ[E	!QR%LE!RaR%L L1add7^((QK(@E!QTT'N1add7^((QL(AE!QTT'N!Q$K%1+%%1+ad*CDD 
=__c,&7[9I%JRWR]R]fkfrfrsJ OOIU]]5<<X	O	!*2?I%%g.Y	"+; =HI[c.1[I)*<l`e`l`lm#Dk
7	 Js   LrQ   c           	         U R                  5       S:X  a  [        R                  " SU R                  S9$ U R                  SS u  p[        R
                  " SU[        R                  U R                  S9n[        R
                  " SU[        R                  U R                  S9n[        R                  " X4SS9u  p4U [        R                  " US5      -  nUR                  UR                  S   S	5      R                  S	5      S   n[        R                  " XR                  S5      [        R                  " S
U R                  S95      R                  U R                  S   S	5      R                  S	5      S   nU [        R                  " US5      -  nUR                  UR                  S   S	5      R                  S	5      S   n	[        R                  " XR                  S5      [        R                  " S
U R                  S95      R                  U R                  S   S	5      R                  S	5      S   n
[        R                  " XzXi/S5      $ )z
Compute the bounding boxes around the provided panoptic segmentation masks.

Args:
    masks: masks in format `[number_masks, height, width]` where N is the number of masks

Returns:
    boxes: bounding boxes in format `[number_masks, 4]` in xyxy format
r   )r   r   r   r   Nri   ij)indexingr6   g    חAr5   )numelrD   rv   rk   rN   aranger   meshgridry   rz   r   r=   tensorr   r   )rQ   hwyxx_maskx_maxx_miny_masky_maxy_mins              r1   masks_to_boxesr   E  s    {{}{{6%,,77;;rsDAQu||DAQu||DA>>!.DAU__Q**FKKQ,004Q7EE;;q>5<<ELL+QRWWX]XcXcdeXfhjkooprstuv 
 U__Q**FKKQ,004Q7EE;;q>5<<ELL+QRWWX]XcXcdeXfhjkooprstuv 
 ;;e3Q77r0   c                    [        U [        R                  5      (       a  [        U R                  5      S:X  ai  U R
                  [        R                  :X  a  U R                  [        R                  5      n U SS2SS2S4   SU SS2SS2S4   -  -   SU SS2SS2S4   -  -   $ [        U S   SU S   -  -   SU S   -  -   5      $ )z"
Converts RGB color to unique ID.
r   Nr      r5   i   r7   )

isinstancerD   Tensorr   rN   rj   r   torw   r   )colors    r1   	rgb_to_idr   j  s     %&&3u{{+;q+@;;%++%HHU[[)EQ1W~eAq!Gn 44y5Aq>7QQQuQx#a.(9uQx+??@@r0   r   r   
masks_pathreturn_masksc                    [        XS9u  pV[        R                  " U5      US   -  n0 n[        R                  " SU;   a  US   OUS   /[        R
                  U R                  S9US'   [        R                  " XV/[        R
                  U R                  S9US'   [        R                  " XV/[        R
                  U R                  S9US'   SU;   Ga  [        U5      R                  S	S
S5      R                  [        R                  U R                  S9n	[        U	5      n	[        R                  " US    V
s/ s H  oS   PM	     sn
U R                  S9nXSS2SS4   :H  n	U	R                  [        R                  5      n	U(       a  XS'   [        U	5      US'   [        R                  " US    V
s/ s H  oS   PM	     sn
[        R
                  U R                  S9US'   [        R                  " US    V
s/ s H  oS   PM	     sn
[        R
                  U R                  S9US'   [        R                  " US    V
s/ s H  oS   PM	     sn
[        R                  U R                  S9US'   U$ s  sn
f s  sn
f s  sn
f s  sn
f )z.
Prepare a coco panoptic annotation for DETR.
)channel_dim	file_namer   rr   ri   rn   r   segments_infor5   r7   r   r   NrQ   r   r   r   r   r   )r   pathlibPathrD   r   r   rk   r   permuter   rw   r   r.   r   r   )r   r   r   r   r   r   r   annotation_pathr   rQ   segment_infoidss               r1    prepare_coco_panoptic_annotationr   u  sG    !/u TLll:.1DDOJ"__)V3
	FekkbgbnbnJz ,)DEKK`e`l`lmJv#oo|.IQVQ\Q\ejeqeqrJ{& ?+33Aq!<??ekkZ_ZfZf?g% oof_F]^F]lD1F]^glgsgstQd]++$"'w,U3
7%*__=CO=TU=T\-(=TU++<<&

>"
 !&9?9PQ9P)$9PQ++<<!

9
 #__6<_6MN6Ml&!6MN--<<

6 - _ V
 R
 Os   9I22I79I< Jc            $         ^  \ rS rSr\r\R                  r\	r
\r\R                  rSrSrSrSrSSS.rSrSS/rS	\\   S
S4U 4S jjr    SAS\R4                  S\S\S-  S\S-  S\\R>                  -  S-  S\\ -  S-  S
\4S jjr! SBS\R4                  S\"S\#S   S
\R4                  4U 4S jjjr$S\RJ                  4S\\\&4   S\'\(\(4   S\'\(\(4   S\)S\#S   4
U 4S jjjr*S\S\'\(\(4   S
\4S jr+S\S \'\(\(4   S!\'\(\(4   S
\4S" jr,   SCS\R4                  S#\'\(\(4   S\\\&4   S-  S$\S%\(4
S& jjr-\.   SDS'\/S(\0\1\0   -  S-  S\S-  S\\R>                  -  S-  S	\\   S
\24U 4S) jjj5       r3S'\1S*   S(\0\1\0   -  S-  S\S\\R>                  -  S-  S+\S\"SS,S-\S.\)S/\S0\S1\)\1\)   -  S-  S2\)\1\)   -  S-  S3\S4\"S-  S\\-  S-  S5\\4-  S-  S
\24$S6 jr5 SES\)S7\4\1\'   -  S-  4S8 jjr6SBS7\1\'\(\(4      S-  4S9 jjr7     SFS\)S:\)S;\)S7\1\'\(\(4      S-  S<\S-  S
\1\   4S= jjr8     SGS\)S:\)S;\)S>\9\(   S-  S7\1\'\(\(4      S-  S
\1\   4S? jjr:S@r;U =r<$ )HDetrImageProcessori  T   5  shortest_edgelongest_edgeFpixel_values
pixel_maskkwargsr   Nc                   > UR                  SUR                  SU R                  5      5        UR                  SS 5      nUc  S OUR                  SS5      nUb  UOSSS.n[        X#SS	9US'   UR	                  S
5      nUR	                  S5      nUc$  [        U S
S 5      c  Ub  UOU R                  U l        [        TU ]$  " S0 UD6  g )Ndo_padpad_and_return_pixel_maskrn   max_sizer   r   r   F)r   default_to_squarer%   do_normalizer&   )

setdefaultpopr   r   getgetattrr   r%   super__init__)selfr   rn   r   r%   r   	__class__s         r1   r   DetrImageProcessor.__init__  s    (FJJ/JDKK$XYzz&$'<4VZZ
D-I'tsTX-Y&tRWXv "(,D!Ezz.1!)gd<TVZ.[.c:F:R,X\XiXiD'"6"r0   r   r   r$   r   r   r   c                     Ub  UOU R                   nU[        R                  :X  a  Uc  SOUn[        XXFS9nU$ U[        R                  :X  a  Uc  SOUn[        UUUUUS9nU$ [        SU S35      e)z4
Prepare an annotation for feeding into DETR model.
F)r   T)r   r   r   zFormat z is not supported.)r$   r   COCO_DETECTIONr   COCO_PANOPTICr   rO   )r   r   r   r$   r   r   r   s          r1   prepare_annotation%DetrImageProcessor.prepare_annotation  s     "-4;;%4441J1RXq%68F  '5550I0QWp%5%6"3F  wvh.@ABBr0   rn   resamplez0PILImageResampling | tvF.InterpolationMode | intc                 $  > UR                   (       a@  UR                  (       a/  [        UR                  SS UR                   UR                  5      nOUR                  (       a@  UR
                  (       a/  [        UR                  SS UR                  UR
                  5      nOJUR                  (       a*  UR                  (       a  UR                  UR                  4nO[        SU S35      e[        TU ],  " U4[        US   US   S9US.UD6nU$ )	a  
Resize the image to the given size. Size can be `min_size` (scalar) or `(height, width)` tuple. If size is an
int, smaller edge of the image will be matched to this number.

Args:
    image (`torch.Tensor`):
        Image to resize.
    size (`SizeDict`):
        Size of the image's `(height, width)` dimensions after resizing. Available options are:
            - `{"height": int, "width": int}`: The image will be resized to the exact size `(height, width)`.
                Do NOT keep the aspect ratio.
            - `{"shortest_edge": int, "longest_edge": int}`: The image will be resized to a maximum size respecting
                the aspect ratio and keeping the shortest edge less or equal to `shortest_edge` and the longest edge
                less or equal to `longest_edge`.
            - `{"max_height": int, "max_width": int}`: The image will be resized to the maximum size respecting the
                aspect ratio and keeping the height less or equal to `max_height` and the width less or equal to
                `max_width`.
    resample (`PILImageResampling | tvF.InterpolationMode | int`, *optional*, defaults to `PILImageResampling.BILINEAR`):
        Resampling filter to use if resizing the image.
r   Nz\Size must contain 'height' and 'width' keys or 'shortest_edge' and 'longest_edge' keys. Got .r   r5   r   r   rn   r  )r   r   r   rN   
max_height	max_widthr   r   r   rO   r   resizer   )r   r   rn   r  r   new_sizer   s         r1   r  DetrImageProcessor.resize  s    6 $"3"3 2%++bc2BDDVDVX\XiXijH__:5;;rs;KT__^b^l^lmH[[TZZTZZ0Hnosnttuv  
 8A;GRZ
^d
 r0         ?
annotationr   rg   	thresholdc                 ~  > [        X25       VVs/ s H	  u  pgXg-  PM     snnu  p0 n
X:S'   UR                  5        H  u  pUS:X  a;  UnU[        R                  " XX/[        R                  UR
                  S9-  nXS'   MF  US:X  a  UnXU-  -  nUU
S'   M\  US:X  a  USS2S4   nU Vs/ s H$  n[        [        U ]#  U[        US   US   S	9US
9PM&     nn[        R                  " U5      R                  [        R                  5      nUSS2S4   U:  nUU
S'   M  US:X  a  X:S'   M  XU'   M     U
$ s  snnf s  snf )ak  
Resizes an annotation to a target size.

Args:
    annotation (`dict[str, Any]`):
        The annotation dictionary.
    orig_size (`tuple[int, int]`):
        The original size of the input image.
    target_size (`tuple[int, int]`):
        The target size of the image, as returned by the preprocessing `resize` step.
    threshold (`float`, *optional*, defaults to 0.5):
        The threshold used to binarize the segmentation masks.
    resample (`PILImageResampling | tvF.InterpolationMode | int`, defaults to `tvF.InterpolationMode.NEAREST_EXACT`):
        The resampling filter to use when resizing the masks.
rn   r   ri   r   rQ   Nr   r5   r  r  )zipitemsrD   r   r   rk   r   r   r  r   r   r   )r   r  r   rg   r  r  r   origratio_heightratio_widthnew_annotationkeyvaluer   scaled_boxesr   scaled_arearQ   r?   r   s                      r1   resize_annotation$DetrImageProcessor.resize_annotation  s}   . HK;Gb$cGb|vV]Gb$c!!,v$**,JCg~$u JRWR_R_hmhtht(   +7w'"L&@A)4v&ag
 !&	 !& ,d:8;q>UV#Xck ;  !&	   E*--emm<adi/*/w')4v&&+s#3 -6 A %d$s   D4++D:
image_sizec                     Uu  p40 nUR                  5        HU  u  pgUS:X  aF  Un[        U5      nU[        R                  " XCXC/[        R                  UR
                  S9-  nXU'   MQ  XuU'   MW     U$ )Nr   ri   )r  r   rD   r   r   rk   )	r   r  r  r   r   norm_annotationr  r  r   s	            r1   normalize_annotation'DetrImageProcessor.normalize_annotationL  s    $.!$**,JCg~07 JRWR_R_hmhtht  (-$',$ - r0   input_image_sizeoutput_image_sizec                 ^   0 nX6S'   S [        X25       5       u  pxUR                  5        H~  u  pU	S:X  a*  U
n[        R                  " UUSS9n[	        US5      nXS'   M5  U	S:X  a3  U(       a,  U
nU[
        R                  " XX/UR                  S9-  nXS'   Mn  U	S:X  a  X6S'   Mz  XU	'   M     U$ )	z+
Update the annotation for a padded image.
rn   c              3   .   #    U  H  u  pX!-  v   M     g 7fNr&   ).0outputinputs      r1   	<genexpr>IDetrImageProcessor._update_annotation_for_padded_image.<locals>.<genexpr>h  s     $rIqU^Iqs   rQ   r   fillr5   r   r   )r  r  tvFpadr   rD   r   rk   )r   r  r   r!  paddingupdate_bboxesr  r  r  r  r  rQ   r   s                r1   #_update_annotation_for_padded_image6DetrImageProcessor._update_annotation_for_padded_image[  s     !2v$rM^Iq$r!$**,JCg~
 %UA.*/w'M+[)_hmhthtuu*/w'):v&&+s## -$ r0   padded_sizer/  r+  c                    UR                  5       SS  nUS   US   -
  nUS   US   -
  nUS:  d  US:  a  [        SU SU S35      eXb:w  a0  SSX/n	[        R                  " XUS9nUb  U R	                  X6X)U5      n[
        R                  " U[
        R                  UR                  S9n
SU
S US   2S US   24'   XU4$ )	Nr   r   r5   zzPadding dimensions are negative. Please make sure that the padded size is larger than the original size. Got padded size: z, original size: r  r*  ri   )	rn   rO   r,  r-  r0  rD   rv   r   rk   )r   r   r2  r  r/  r+  original_sizepadding_bottompadding_rightr.  r   s              r1   r-  DetrImageProcessor.pad~  s    

RS)$Q-*::#Aq)99A!233>-?PQ^P__`b  '!];GGGE6E%!EE{]

 [[EKKU
=>
%]1%%'9q)9'99:*,,r0   imagesr   c                 (   > [         TU ]  " XX440 UD6$ )a  
annotations (`AnnotationType` or `list[AnnotationType]`, *optional*):
    Annotations to transform according to the padding that is applied to the images.
return_segmentation_masks (`bool`, *optional*, defaults to `self.return_segmentation_masks`):
    Whether to return segmentation masks.
masks_path (`str` or `pathlib.Path`, *optional*):
    Path to the directory containing the segmentation masks.
)r   
preprocess)r   r8  r   r   r   r   r   s         r1   r:  DetrImageProcessor.preprocess  s    " w!&7Pg`fggr0   ztorch.Tensor	do_resizez7PILImageResampling | tvF.InterpolationMode | int | None
do_rescalerescale_factorr   r%   
image_mean	image_stdr   pad_sizereturn_tensorsc           
         Ub  [        U[        5      (       a  U/nUb<  [        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      e[	        U5      nUb  [        U[        U5        UbQ  U[        R                  :X  a=  [        U[        R                  [        45      (       d  [        S[        U5       S35      e0 n/ n/ n/ n[        Xb  UOS/[        U5      -  5       H  u  nnUb"  U R                  UUUUU[        R                  S9nU(       aH  U R!                  UXgS9nUb3  U R#                  UUR%                  5       S	S UR%                  5       S	S S
9nUnU R'                  UXXU5      nU(       a-  Ub*  U R)                  U[+        U[        R                  5      5      nUR-                  U5        UR-                  U5        M     UnUb  UOSnU(       GaB  Ub  UR.                  UR0                  4nO[3        U5      n/ n/ n[        Xb  UOS/[        U5      -  5       H  u  nnUUR%                  5       S	S :X  aa  UR-                  U5        UR-                  [4        R6                  " U[4        R8                  UR:                  S95        UR-                  U5        M~  U R=                  UUUUS9u  nnnUR-                  U5        UR-                  U5        UR-                  U5        M     UnUb  UOSnUR?                  S[4        R@                  " USS905        UR?                  S[4        R@                  " USS905        [C        UUS9nUb  U Vs/ s H  n[C        UUS9PM     snUS'   U$ s  snf )zO
Preprocess an image or a batch of images so that it can be used by the model.
NzThe number of images (z) and annotations (z) do not match.zxThe path to the directory containing the mask PNG files should be provided as a `pathlib.Path` or string object, but is z	 instead.)r   r   r   r  r   )r   rg   ri   )r  r/  r   r   dimr   )tensor_typerS   )"r   dictr   rO   r   r   SUPPORTED_ANNOTATION_FORMATSr   r   r   r,   typer  r   r   FIRSTr  r  rn   rescale_and_normalizer  r   rF   r   r   r   rD   onesr   rk   r-  updater   r
   )r   r8  r   r   r   r<  rn   r  r=  r>  r   r%   r?  r@  r   rA  r$   rB  r   dataprocessed_imagesprocessed_annotationspixel_masksr   r  resized_imager2  padded_imagespadded_annotationsr   encoded_inputss                                  r1   _preprocessDetrImageProcessor._preprocess  s   0 "z+t'D'D&-K"s6{c+6F'F(V5H[IYHZZij  "&)" )E{S "*888zGLL#+>??<<@<L;MYX 
  "!$V<S[Z^Y_beflbmYm!nE:&!44.G)&6&<&< 5 
  $E P*!%!7!7""'**,rs"3$1$6$6$8$= "8 "J
 &..ujR^luvE%+*A!66z>RWYiYoYoCpq
##E*!((47 "o8 "/:/F+D#'?26:M!#%(@W^b]cfijpfq]q%r!z%**,rs"33!((/&&uzz+U[[Y^YeYe'fg&--j904;:Mc 19 1-z: $$U+"))*5"":. &s #F0;0G,TKKKu{{;A'FGH^U[[Q%?@A%dG"Wb(WbZ^DWb(N8$ (s   %Ntarget_sizesc                    UR                   UR                  pTUb#  [        U5      [        U5      :w  a  [        S5      e[        R
                  R                  US5      nUSSS24   R                  S5      u  px[        U5      n	Ub  [        U[        5      (       aS  [        R                  " U V
s/ s H  oS   PM	     sn
5      n[        R                  " U V
s/ s H  oS   PM	     sn
5      nOUR                  S5      u  p[        R                  " XX/SS9R                  U	R                   5      nXSS2SSS24   -  n	/ n[#        XxU	5       H1  u  nnnXU:     nUX:     nUX:     nUR%                  UUUS.5        M3     U$ s  sn
f s  sn
f )	a  
Converts the raw output of [`DetrForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
bottom_right_x, bottom_right_y) format. Only supports PyTorch.

Args:
    outputs ([`DetrObjectDetectionOutput`]):
        Raw outputs of the model.
    threshold (`float`, *optional*):
        Score threshold to keep object detection predictions.
    target_sizes (`torch.Tensor` or `list[tuple[int, int]]`, *optional*):
        Tensor of shape `(batch_size, 2)` or list of tuples (`tuple[int, int]`) containing the target size
        `(height, width)` of each image in the batch. If unset, predictions will not be resized.
Returns:
    `list[Dict]`: A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
    in the batch as predicted by the model.
NTMake sure that you pass in as many target sizes as the batch dimension of the logitsr6   .r   r5   rD  )rR   rS   r   )logits
pred_boxesr   rO   r   r   softmaxr   r   r   r>   rD   r   unbindr   r   rk   r  rF   )r   outputsr  rX  
out_logitsout_bboxprobrR   rS   r   iimg_himg_w	scale_fctresultsslbru   labelboxs                        r1   post_process_object_detection0DetrImageProcessor.post_process_object_detection$  s}   &  '~~w/A/AH#:#l"33 j  }}$$Z4c3B3h++B/ )2 #,--L%ALqdL%ABL%ALqdL%AB+2215U5$@aHKKELLYIaqj11E651GAq!)m$Eam$EAM"CNNeusKL	 2  &B%As   /FF	c                 X   UR                   nUR                  nUR                  SS9SSS24   nUR                  5       n[        R
                  " SXV5      nUR                  S   nUb  U[        U5      :w  a  [        S5      e/ n	[        U5       HW  n
[        R                  R                  Xz   R                  SS9X*   SS	S
9nUS   R                  SS9nU	R                  U5        MY     U	$ UR                  SS9n	[        U	R                  S   5       Vs/ s H  oU   PM	     n	nU	$ s  snf )a  
Converts the output of [`DetrForSegmentation`] into semantic segmentation maps. Only supports PyTorch.

Args:
    outputs ([`DetrForSegmentation`]):
        Raw outputs of the model.
    target_sizes (`list[tuple[int, int]]`, *optional*):
        A list of tuples (`tuple[int, int]`) containing the target size (height, width) of each image in the
        batch. If unset, predictions will not be resized.
Returns:
    `list[torch.Tensor]`:
        A list of length `batch_size`, where each item is a semantic segmentation map of shape (height, width)
        corresponding to the target_sizes entry (if `target_sizes` is specified). Each entry of each
        `torch.Tensor` correspond to a semantic class id.
r6   rD  .Nzbqc, bqhw -> bchwr   rZ  rl   Frm   r5   )r[  
pred_masksr]  sigmoidrD   einsumrN   r   rO   r|   r   r   rx   ry   r{   rF   )r   r_  rX  class_queries_logitsmasks_queries_logitsmasks_classesmasks_probsrG   
batch_sizesemantic_segmentationrJ   resized_logitssemantic_maprc  s                 r1   "post_process_semantic_segmentation5DetrImageProcessor.post_process_semantic_segmentationY  s_      '~~&11 -444<S#2#XF*224 ||$7T)//2
 #S.. j  %'!Z(!#!:!: %//A/6\=NU_ot "; "  .a077A7>%,,\: ) %$ %1$7$7A$7$>!GLMbMhMhijMkGl$mGl!1%=Gl!$m$$ %ns   D'r^   r_   return_coco_annotationc                    UR                   nUR                  nUR                  S   n	UR                  S   S-
  n
UR                  5       n[        R
                  R                  USS9R                  S5      u  p/ n[        U	5       H  n[        X   X   X   X*5      u  nnnUR                  S   S::  aJ  Ub  X_   OUR                  SS u  nn[        R                  " UU45      S-
  nUR                  U/ S.5        Mx  Ub  X_   OSn[        UUUUU/ US9u  nnU(       a  [        U5      nUR                  UUS.5        M     U$ )a  
Converts the output of [`DetrForSegmentation`] into instance segmentation predictions. Only supports PyTorch.

Args:
    outputs ([`DetrForSegmentation`]):
        Raw outputs of the model.
    threshold (`float`, *optional*, defaults to 0.5):
        The probability score threshold to keep predicted instance masks.
    mask_threshold (`float`, *optional*, defaults to 0.5):
        Threshold to use when turning the predicted masks into binary values.
    overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
        The overlap mask area threshold to merge or discard small disconnected parts within each binary
        instance mask.
    target_sizes (`list[Tuple]`, *optional*):
        List of length (batch_size), where each list item (`tuple[int, int]]`) corresponds to the requested
        final size (height, width) of each prediction. If unset, predictions will not be resized.
    return_coco_annotation (`bool`, *optional*):
        Defaults to `False`. If set to `True`, segmentation maps are returned in COCO run-length encoding (RLE)
        format.
Returns:
    `list[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys:
    - **segmentation** -- A tensor of shape `(height, width)` where each pixel represents a `segment_id` or
      `list[List]` run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to
      `True`. Set to `None` if no mask if found above `threshold`.
    - **segments_info** -- A dictionary that contains additional information on each segment.
        - **id** -- An integer representing the `segment_id`.
        - **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`.
        - **score** -- Prediction score of segment with `segment_id`.
r   r6   r5   rD  NrG   r   r\   r~   r   r^   r_   rf   rg   )r[  rp  rN   rq  r   r   r]  r   r|   rW   rD   rv   rF   r   rL   )r   r_  r  r^   r_   rX  r}  rs  rt  rw  rU   r\   r~   r   rg  rc  mask_probs_itempred_scores_itempred_labels_itemr   r   rG   rg   r   s                           r1   "post_process_instance_segmentation5DetrImageProcessor.post_process_instance_segmentation  sp   L  '~~&11)//2
)//3a7
)113
 $&==#8#89MSU#8#V#Z#Z[]#^  02z"AB[{~{~yC?O-/?
 $$Q'1,3?3KQ`QfQfghgiQj${{FE?;a?rRS .:-E,/4K%5*,,-,G"$'&"L( &:<HNNL8TU9 #: r0   rf   c                    Uc  [         R                  S5        [        5       nUR                  nUR                  nUR
                  S   n	UR
                  S   S-
  n
UR                  5       n[        R                  R                  USS9R                  S5      u  p/ n[        U	5       H  n[        X   X   X   X*5      u  nnnUR
                  S   S::  aJ  Ub  Xo   OUR
                  SS u  nn[        R                  " UU45      S-
  nUR                  U/ S.5        Mx  Ub  Xo   OSn[!        UUUUUUUS9u  nnUR                  UUS.5        M     U$ )	am  
Converts the output of [`DetrForSegmentation`] into image panoptic segmentation predictions. Only supports
PyTorch.

Args:
    outputs ([`DetrForSegmentation`]):
        The outputs from [`DetrForSegmentation`].
    threshold (`float`, *optional*, defaults to 0.5):
        The probability score threshold to keep predicted instance masks.
    mask_threshold (`float`, *optional*, defaults to 0.5):
        Threshold to use when turning the predicted masks into binary values.
    overlap_mask_area_threshold (`float`, *optional*, defaults to 0.8):
        The overlap mask area threshold to merge or discard small disconnected parts within each binary
        instance mask.
    label_ids_to_fuse (`Set[int]`, *optional*):
        The labels in this state will have all their instances be fused together. For instance we could say
        there can only be one sky in an image, but several persons, so the label ID for sky would be in that
        set, but not the one for person.
    target_sizes (`list[Tuple]`, *optional*):
        List of length (batch_size), where each list item (`tuple[int, int]]`) corresponds to the requested
        final size (height, width) of each prediction in batch. If unset, predictions will not be resized.
Returns:
    `list[Dict]`: A list of dictionaries, one per image, each dictionary containing two keys:
    - **segmentation** -- a tensor of shape `(height, width)` where each pixel represents a `segment_id` or
      `None` if no mask if found above `threshold`. If `target_sizes` is specified, segmentation is resized to
      the corresponding `target_sizes` entry.
    - **segments_info** -- A dictionary that contains additional information on each segment.
        - **id** -- an integer representing the `segment_id`.
        - **label_id** -- An integer representing the label / semantic class id corresponding to `segment_id`.
        - **was_fused** -- a boolean, `True` if `label_id` was in `label_ids_to_fuse`, `False` otherwise.
          Multiple instances of the same class / label were fused and assigned a single `segment_id`.
        - **score** -- Prediction score of segment with `segment_id`.
Nz5`label_ids_to_fuse` unset. No instance will be fused.r   r6   r5   rD  r  r  )loggerwarning_oncesetr[  rp  rN   rq  r   r   r]  r   r|   rW   rD   rv   rF   r   )r   r_  r  r^   r_   rf   rX  rs  rt  rw  rU   r\   r~   r   rg  rc  r  r  r  r   r   rG   rg   r   s                           r1   "post_process_panoptic_segmentation5DetrImageProcessor.post_process_panoptic_segmentation  s   V $ WX #&~~&11)//2
)//3a7
)113
 $&==#8#89MSU#8#V#Z#Z[]#^  02z"AB[{~{~yC?O-/?
 $$Q'1,3?3KQ`QfQfghgiQj${{FE?;a?rRS .:-E,/4K%5*,,-,G"3'&"L( NNL8TU1 #2 r0   )r%   )NNNNr$  )NTr   )NNN)r  N)r  r  皙?NF)r  r  r  NN)=r'   r(   r)   r*   r"   valid_kwargsr   BILINEARr  r   r?  r   r@  r   r   r$   r<  r=  r   r   rn   r   model_input_namesr   r   rD   r   rG  r.   r,   r   r   r   r   r   r   r  NEARESTr   tupler   floatr  r  r0  r-  r   r   r   r>   r
   r:  r   rV  rm  r{  r  r  r  r/   __classcell__)r   s   @r1   r   r     s@   +L!**H&J$I,,FIJLF $7D'6#(@!A #d #* +/1504;?||  !4'	
 $($; ',,&- !11D8 
H RV	+||+ + MN	+ 
+ +d QcQkQk7cN7 c?7 38_	7
 7 MN7 7rt sCx UY !!  S/! !c?	! 
!N -1"-||- 38_- cNT)	-
 - -<  EI1504hh $d>&::TAh $($;	h
 ',,&-h 12h 
h h$r^$r $d>&::TAr $(	r
 ',,&-r r r Lr r r r !%r DK'$.r 4;&-r r  T/!r" &&-#r$ j(4/%r( 
)rl `d3"'3=G$u+=UX\=\3j-%USVX[S[_H]`dHd -%d  #-059.3Q Q 	Q
 &+Q 5c?+d2Q !%tQ 
dQl  #-0-159V V 	V
 &+V s8d?V 5c?+d2V 
dV Vr0   r   )r  r  )r  r  NN)FN)TN)Ir+   r   typingr   r   r9   r;   rD   r   torchvision.ior   torchvision.transforms.v2r   r,  image_processing_backendsr	   image_processing_utilsr
   r   image_transformsr   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   processing_utilsr   r   r8   r   r   r    
get_loggerr'   r  r   r   rH  r"   rB   rL   rW   re   r  r  r   r  r   rk   r   r   r.   r,   r   r   r   rG  r   r   r   __all__r&   r0   r1   <module>r     s   &       % 7 ; A     5  
		H	% 0 ? ?AQA_A_` !|5 !0 *<8,  ),)-*.9" 	9"
 "'9" 3x$9" sCx4'9"zS  ell _d_k_k L ',7;	=  $= (#-4	=@8%,, 85<< 8JA 04.<<.. gll". 	.
 (#-. 
.b J
+ J
 J
Z  
 r0   