
    Z jF                         S SK r S SKJr  S SKrS SKJs  Js  Jr  SSK	J
r
  SSKJr  SSKJrJrJr  SSKJrJrJrJr  SSKJrJr  SS	KJrJr  \(       a  S
SKJr  S rS rS r S r!\ " S S\
5      5       r"S/r#g)    N)TYPE_CHECKING   )TorchvisionBackend)BatchFeature)center_to_corners_formatgroup_images_by_shapereorder_images)OPENAI_CLIP_MEANOPENAI_CLIP_STDPILImageResamplingSizeDict)ImagesKwargsUnpack)
TensorTypeauto_docstring   )Owlv2ObjectDetectionOutputc                 ,   U R                  5       (       a@  U R                  [        R                  [        R                  4;   a  U $ U R                  5       $ U R                  [        R                  [        R                  4;   a  U $ U R                  5       $ )N)	is_floating_pointdtypetorchfloat32float64floatint32int64int)ts    ځ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/owlv2/image_processing_owlv2.py_upcastr    '   sc    GGu}}==qL1779LGGU[[99qFquuwF    c                 f    [        U 5      n U SS2S4   U SS2S4   -
  U SS2S4   U SS2S4   -
  -  $ )a  
Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates.

Args:
    boxes (`torch.FloatTensor` of shape `(number_of_boxes, 4)`):
        Boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format with `0 <= x1
        < x2` and `0 <= y1 < y2`.
Returns:
    `torch.FloatTensor`: a tensor containing the area for each box.
N   r   r   r   )r    )boxess    r   box_arear%   /   sB     ENE!Q$K%1+%%1+ad*CDDr!   c                 V   [        U 5      n[        U5      n[        R                  " U S S 2S S S24   US S 2S S24   5      n[        R                  " U S S 2S SS 24   US S 2SS 24   5      nXT-
  R	                  SS9nUS S 2S S 2S4   US S 2S S 2S4   -  nUS S 2S 4   U-   U-
  nXx-  n	X4$ )Nr#   r   minr   )r%   r   maxr(   clamp)
boxes1boxes2area1area2left_topright_bottomwidth_heightinterunionious
             r   box_iour5   >   s    VEVEyy4!,fQUm<H99VAtQRK0&AB-@L +22q29LAq!LAq$99E!T'NU"U*E
-C:r!   c                 (   [        U[        [        45      (       aS  [        R                  " U Vs/ s H  o"S   PM	     sn5      n[        R                  " U Vs/ s H  o"S   PM	     sn5      nO>[        U[        R
                  5      (       a  UR                  S5      u  p4O[        S5      e[        R                  " X45      n[        R                  " XUXU/SS9nUR                  S5      R                  U R                  5      nX-  n U $ s  snf s  snf )a  
Scale batch of bounding boxes to the target sizes.

Args:
    boxes (`torch.Tensor` of shape `(batch_size, num_boxes, 4)`):
        Bounding boxes to scale. Each box is expected to be in (x1, y1, x2, y2) format.
    target_sizes (`list[tuple[int, int]]` or `torch.Tensor` of shape `(batch_size, 2)`):
        Target sizes to scale the boxes to. Each target size is expected to be in (height, width) format.

Returns:
    `torch.Tensor` of shape `(batch_size, num_boxes, 4)`: Scaled bounding boxes.
r   r   z4`target_sizes` must be a list, tuple or torch.Tensordim)
isinstancelisttupler   tensorTensorunbind	TypeErrorr)   stack	unsqueezetodevice)r$   target_sizesiimage_heightimage_widthmax_sizescale_factors          r   _scale_boxesrJ   N   s     ,u..||<$@<aqT<$@All,#?,QaD,#?@	L%,,	/	/$0$7$7$:!kNOO yy3H;;HGQOL))!,//=L EL %A#?s   D
Dc                     ^  \ rS rSr\R
                  r\r\	r
SSS.rSrSrSrSrSrSrSrS/rSrSrS\\   4U 4S	 jjr  S'S
SS\S\\\   -  S-  4S jjrS(S jrS)SSS\SS4S jjr S)S\S   S\ S-  S\S\S   4S jjr!  S*SSS\"S\ SS4U 4S jjjr#S\S   S\ S\"SSS\ S\ S \S!\ S"\\\   -  S-  S#\\\   -  S-  S\ S-  S$\$\-  S-  S\%4S% jr&S&r'U =r($ )+Owlv2ImageProcessorm   i  )heightwidthTNpixel_valuesgp?kwargsc                 \   > SU;   a  UR                  S5      nX!S'   [        TU ]  " S0 UD6  g )Nrescale
do_rescale )popsuper__init__)selfrQ   rescale_val	__class__s      r   rX   Owlv2ImageProcessor.__init__~   s3      **Y/K#.< "6"r!   outputsr   	thresholdrD   c                    UR                   UR                  pT[        U5      nUb  [        U5      U:w  a  [        S5      e[        R
                  " USS9n[        R                  " UR                  5      nUR                  n	[        U5      nUb  [        XS5      n/ n
[        XU5       H*  u  pnX:  nX   nX   nX   nU
R                  XUS.5        M,     U
$ )a  
Converts the raw output of [`Owlv2ForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
bottom_right_x, bottom_right_y) format.

Args:
    outputs ([`Owlv2ObjectDetectionOutput`]):
        Raw outputs of the model.
    threshold (`float`, *optional*, defaults to 0.1):
        Score threshold to keep object detection predictions.
    target_sizes (`torch.Tensor` or `list[tuple[int, int]]`, *optional*):
        Tensor of shape `(batch_size, 2)` or list of tuples (`tuple[int, int]`) containing the target size
        `(height, width)` of each image in the batch. If unset, predictions will not be resized.

Returns:
    `list[Dict]`: A list of dictionaries, each dictionary containing the following keys:
    - "scores": The confidence scores for each predicted box on the image.
    - "labels": Indexes of the classes predicted by the model on the image.
    - "boxes": Image bounding boxes in (top_left_x, top_left_y, bottom_right_x, bottom_right_y) format.
z9Make sure that you pass in as many target sizes as imagesr7   scoreslabelsr$   )logits
pred_boxeslen
ValueErrorr   r)   sigmoidvaluesindicesr   rJ   zipappend)rY   r]   r^   rD   batch_logitsbatch_boxes
batch_sizebatch_class_logitsbatch_scoresbatch_labelsresultsrb   rc   r$   keeps                  r   post_process_object_detection1Owlv2ImageProcessor.post_process_object_detection   s    2 %,NNG4F4Fk&
#L(9Z(GXYY #YY|<}}%7%>%>?)11 /{; #&{AK%([%Q!FE%D\F\FKENNfOP &R r!   c                    UR                   UR                  peUb#  [        U5      [        U5      :w  a  [        S5      eUb  UR                  S   S:w  a  [        S5      e[
        R                  " USS9n[
        R                  " UR                  5      n[        U5      nUS:  a  [        UR                  S	   5       Hj  n	[
        R                  " X   * 5       HJ  n
X   U
   (       d  M  [        Xi   U
SS24   R                  S	5      Xi   5      S	   S	   nS
X'   SX   X:  '   ML     Ml     Ub  [        Xd5      n/ n[
        R                  " U5      n[        UR                  S	   5       H  n	X   nUR!                  5       R#                  5       (       d  M,  SXU:  '   [
        R                  " U5      S-   nXS-  -
  US-  -  n[
        R$                  " USS5      nUX'   X   S	:  nX   U   nXi   U   nUR'                  USUS.5        M     U$ )a2  
Converts the output of [`Owlv2ForObjectDetection.image_guided_detection`] into the format expected by the COCO
api.

Args:
    outputs ([`Owlv2ImageGuidedObjectDetectionOutput`]):
        Raw outputs of the model.
    threshold (`float`, *optional*, defaults to 0.0):
        Minimum confidence threshold to use to filter out predicted boxes.
    nms_threshold (`float`, *optional*, defaults to 0.3):
        IoU threshold for non-maximum suppression of overlapping boxes.
    target_sizes (`torch.Tensor`, *optional*):
        Tensor of shape (batch_size, 2) where each entry is the (height, width) of the corresponding image in
        the batch. If set, predicted normalized bounding boxes are rescaled to the target sizes. If left to
        None, predictions will not be unnormalized.

Returns:
    `list[Dict]`: A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
    in the batch as predicted by the model. All labels are set to None as
    `Owlv2ForObjectDetection.image_guided_detection` perform one-shot object detection.
NzTMake sure that you pass in as many target sizes as the batch dimension of the logitsr   r#   zTEach element of target_sizes must contain the size (h, w) of each image of the batchr`   r7   g      ?r   g              gư>皙?g?ra   )rd   target_pred_boxesrf   rg   shaper   r)   rh   ri   r   rangeargsortr5   rA   rJ   
zeros_likenonzeronumelcliprl   )rY   r]   r^   nms_thresholdrD   rd   target_boxesprobsrb   idxrE   iousrs   alphasquery_scores	max_scorequery_alphasmask
box_scoresr$   s                       r   #post_process_image_guided_detection7Owlv2ImageProcessor.post_process_image_guided_detection   s   ,  '~~w/H/H#Fs<7H(Hstt#(:(:1(=(Bstt		&b)u||, 0= 3\//23|4A!;q> "<#4QT#:#D#DQ#GIZ[\]^_`aD"DG8;FK 45 5 4 #'CL !!&)++A./C!;L'')//11 69L	12 		,/$6I(O<SQL ::lC=L&FK;?DT*J %d+ENNjD5QR' 0* r!   imagesztorch.Tensorconstant_valuereturnc                     UR                   SS u  p4[        X45      nXS-
  nXT-
  nSSXv4n[        R                  " XUS9n	U	$ )z,
Pad an image with zeros to the given size.
Nr   )fill)r{   r)   tvFpad)
rY   r   r   rN   rO   size
pad_bottom	pad_rightpaddingpadded_images
             r   _pad_imagesOwlv2ImageProcessor._pad_images
  sP     RS)6!]
L	a/wwv^Dr!   disable_groupingc                     [        XS9u  pV0 nUR                  5        H  u  pU R                  U	US9n	XU'   M     [        Xv5      n
U
$ )zx
Unlike the Base class `self.pad` where all images are padded to the maximum image size,
Owlv2 pads an image to square.
r   )r   )r   itemsr   r	   )rY   r   r   r   rQ   grouped_imagesgrouped_images_indexprocessed_images_groupedr{   stacked_imagesprocessed_imagess              r   r   Owlv2ImageProcessor.pad  sg     0EV/o,#% %3%9%9%;!E!--- . N /=U+ &< **BYr!   imager   anti_aliasingc                   > UR                   UR                  4nUR                  n[        R                  " USS 5      R                  UR                  5      [        R                  " U5      R                  UR                  5      -  nU(       Ga#  Uc  US-
  S-  R                  SS9nO[        R                  " U5      [        R                  " U5      -  n[        R                  " US:  5      (       a  [        S5      e[        R                  " US:  US:*  -  5      (       a  [        R                  " S5        [        R                  " US:H  5      (       a  Un	O[S[        R                  " SU-  5      R                  5       -  S-   n
[         R"                  " XS   U
S   4UR%                  5       S	9n	OUn	[&        TU ]Q  XS
S9$ )a  
Resize an image as per the original implementation.

Args:
    image (`Tensor`):
        Image to resize.
    size (`dict[str, int]`):
        Dictionary containing the height and width to resize the image to.
    anti_aliasing (`bool`, *optional*, defaults to `True`):
        Whether to apply anti-aliasing when downsampling the image.
    anti_aliasing_sigma (`float`, *optional*, defaults to `None`):
        Standard deviation for Gaussian kernel when downsampling the image. If `None`, it will be calculated
        automatically.
r#   Nr   r   r'   zFAnti-aliasing standard deviation must be greater than or equal to zerozWAnti-aliasing standard deviation greater than zero but not down-sampling along all axesr   )sigmaF)r   	antialias)rN   rO   r{   r   r<   rB   rC   r*   
atleast_1d	ones_likeanyrg   warningswarnceilr   r   gaussian_blurtolistrW   resize)rY   r   r   r   anti_aliasing_sigmarQ   output_shapeinput_shapefactorsfilteredkernel_sizesr[   s              r   r   Owlv2ImageProcessor.resize/  s   , TZZ0kk ,,{12/225<<@5<<P\C]C`C`afamamCnn"*(/!q'8&?&?A&?&F#&+&6&67J&Keoo^eNf&f#9901455$%mnnYY 3a 7GqLIJJMMq yy,122  5::a2E.E#F#J#J#LLqP,,O\!_=EXE_E_Ea
 Hw~hU~CCr!   	do_resizeresamplezPILImageResampling | Nonedo_padrT   rescale_factordo_normalize
image_mean	image_stdreturn_tensorsc           	          [        XS9u  p0 nUR                  5        H  u  nnU R                  UXgSX5      nUUU'   M!     [        UU5      nU(       a  U R	                  USUS9n[        UUS9u  p0 nUR                  5        H$  u  nnU(       d  M  U R                  UX4S9nUUU'   M&     [        UU5      n[        UUS9u  p0 nUR                  5        H  u  nnU R                  USXxX5      nUUU'   M!     [        UU5      n[        SU0US9$ )Nr   Frx   )r   r   )r   r   r   rP   )datatensor_type)r   r   rescale_and_normalizer	   r   r   r   )rY   r   r   r   r   r   rT   r   r   r   r   r   r   rQ   r   r   r   r{   r   r   resized_images_groupedresized_stackresized_imagess                          r   _preprocessOwlv2ImageProcessor._preprocesse  sb   " 0EV/o,#% %3%9%9%;!E>!77
E:N /=$U+ &< **BDXY#xx(8_oxp/D/?0
, "$%3%9%9%;!E>y $.t _0=&u- &< ((>@TU 0E^fv/w,#% %3%9%9%;!E>!77~ZN /=$U+ &< **BDXY.2B!CQ_``r!   rU   )ry   N)rx   g333333?N)rx   )TN))__name__
__module____qualname____firstlineno__r   BILINEARr   r
   r   r   r   r   default_to_square	crop_sizer   do_center_croprT   r   do_convert_rgbmodel_input_namesr   r   r   r   rX   r   r   r:   r;   ru   r   r   boolr   r   r   strr   r   __static_attributes____classcell__)r[   s   @r   rL   rL   m   s	   !**H!JIC(DIINJLN'(NF#!5 # 8<	3-3 3 !4;.5	3jKZ. % R` " !$	 ^$  +  	  
n	 8 # 4D4D 4D 	4D 
4D 4Dl7a^$7a 7a 	7a
 .7a 7a 7a 7a 7a DK'$.7a 4;&-7a +7a j(4/7a 
7a 7ar!   rL   )$r   typingr   r   $torchvision.transforms.v2.functional
transformsv2
functionalr   image_processing_backendsr   image_processing_utilsr   image_transformsr   r   r	   image_utilsr
   r   r   r   processing_utilsr   r   utilsr   r   modeling_owlv2r   r    r%   r5   rJ   rL   __all__rU   r!   r   <module>r      sy   *     2 2 ; 2 _ _ Z Z 4 / :GE > na, na nab	 !
!r!   