
    Z j*                         S r SSKJr  SSKrSSKJr  SSKJr  SSKJ	r	J
r
Jr  SSKJrJr  SS	KJrJrJr  \(       a  S
SKJr  \R*                  " \5      rS rS rS rS r\ " S S\5      5       rS/rg)z Image processor class for OwlViT    )TYPE_CHECKINGN   )TorchvisionBackend)center_to_corners_format)OPENAI_CLIP_MEANOPENAI_CLIP_STDPILImageResampling)ImagesKwargsUnpack)
TensorTypeauto_docstringlogging   )OwlViTObjectDetectionOutputc                 ,   U R                  5       (       a@  U R                  [        R                  [        R                  4;   a  U $ U R                  5       $ U R                  [        R                  [        R                  4;   a  U $ U R                  5       $ )N)	is_floating_pointdtypetorchfloat32float64floatint32int64int)ts    ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/owlvit/image_processing_owlvit.py_upcastr   "   sc    GGu}}==qL1779LGGU[[99qFquuwF    c                    [        U[        [        45      (       aS  [        R                  " U Vs/ s H  o"S   PM	     sn5      n[        R                  " U Vs/ s H  o"S   PM	     sn5      nO>[        U[        R
                  5      (       a  UR                  S5      u  p4O[        S5      e[        R                  " XCXC/SS9nUR                  S5      R                  U R                  5      nX-  n U $ s  snf s  snf )a  
Scale batch of bounding boxes to the target sizes.

Args:
    boxes (`torch.Tensor` of shape `(batch_size, num_boxes, 4)`):
        Bounding boxes to scale. Each box is expected to be in (x1, y1, x2, y2) format.
    target_sizes (`list[tuple[int, int]]` or `torch.Tensor` of shape `(batch_size, 2)`):
        Target sizes to scale the boxes to. Each target size is expected to be in (height, width) format.

Returns:
    `torch.Tensor` of shape `(batch_size, num_boxes, 4)`: Scaled bounding boxes.
r   r   z4`target_sizes` must be a list, tuple or torch.Tensordim)
isinstancelisttupler   tensorTensorunbind	TypeErrorstack	unsqueezetodevice)boxestarget_sizesiimage_heightimage_widthscale_factors         r   _scale_boxesr3   *   s     ,u..||<$@<aqT<$@All,#?,QaD,#?@	L%,,	/	/$0$7$7$:!kNOO;;;U[\]L))!,//=L EL %A#?s   C4C9c                 f    [        U 5      n U SS2S4   U SS2S4   -
  U SS2S4   U SS2S4   -
  -  $ )a  
Computes the area of a set of bounding boxes, which are specified by its (x1, y1, x2, y2) coordinates.

Args:
    boxes (`torch.FloatTensor` of shape `(number_of_boxes, 4)`):
        Boxes for which the area will be computed. They are expected to be in (x1, y1, x2, y2) format with `0 <= x1
        < x2` and `0 <= y1 < y2`.
Returns:
    `torch.FloatTensor`: a tensor containing the area for each box.
N   r   r   r   )r   )r-   s    r   box_arear6   F   sB     ENE!Q$K%1+%%1+ad*CDDr   c                 V   [        U 5      n[        U5      n[        R                  " U S S 2S S S24   US S 2S S24   5      n[        R                  " U S S 2S SS 24   US S 2SS 24   5      nXT-
  R	                  SS9nUS S 2S S 2S4   US S 2S S 2S4   -  nUS S 2S 4   U-   U-
  nXx-  n	X4$ )Nr5   r   )minr   )r6   r   maxr8   clamp)
boxes1boxes2area1area2left_topright_bottomwidth_heightinterunionious
             r   box_iourE   U   s    VEVEyy4!,fQUm<H99VAtQRK0&AB-@L +22q29LAq!LAq$99E!T'NU"U*E
-C:r   c            	          ^  \ rS rSr\R
                  r\r\	r
SSS.rSrSSS.rSrSrSrSrSrS/rS\\   4U 4S jjr  SS
SS\S\\\   -  S	-  4S jjrSS jrSrU =r$ )OwlViTImageProcessore   i   )heightwidthTFpixel_valueskwargsc                 \   > SU;   a  UR                  S5      nX!S'   [        TU ]  " S0 UD6  g )Nrescale
do_rescale )popsuper__init__)selfrL   rescale_val	__class__s      r   rS   OwlViTImageProcessor.__init__t   s3      **Y/K#.< "6"r   Noutputsr   	thresholdr.   c                    UR                   UR                  pT[        U5      nUb  [        U5      U:w  a  [        S5      e[        R
                  " USS9n[        R                  " UR                  5      nUR                  n	[        U5      nUb  [        XS5      n/ n
[        XU5       H*  u  pnX:  nX   nX   nX   nU
R                  XUS.5        M,     U
$ )a  
Converts the raw output of [`OwlViTForObjectDetection`] into final bounding boxes in (top_left_x, top_left_y,
bottom_right_x, bottom_right_y) format.

Args:
    outputs ([`OwlViTObjectDetectionOutput`]):
        Raw outputs of the model.
    threshold (`float`, *optional*, defaults to 0.1):
        Score threshold to keep object detection predictions.
    target_sizes (`torch.Tensor` or `list[tuple[int, int]]`, *optional*):
        Tensor of shape `(batch_size, 2)` or list of tuples (`tuple[int, int]`) containing the target size
        `(height, width)` of each image in the batch. If unset, predictions will not be resized.

Returns:
    `list[Dict]`: A list of dictionaries, each dictionary containing the following keys:
    - "scores": The confidence scores for each predicted box on the image.
    - "labels": Indexes of the classes predicted by the model on the image.
    - "boxes": Image bounding boxes in (top_left_x, top_left_y, bottom_right_x, bottom_right_y) format.
z9Make sure that you pass in as many target sizes as imagesr    scoreslabelsr-   )logits
pred_boxeslen
ValueErrorr   r9   sigmoidvaluesindicesr   r3   zipappend)rT   rX   rY   r.   batch_logitsbatch_boxes
batch_sizebatch_class_logitsbatch_scoresbatch_labelsresultsr]   r^   r-   keeps                  r   post_process_object_detection2OwlViTImageProcessor.post_process_object_detection~   s    2 %,NNG4F4Fk&
#L(9Z(GXYY #YY|<}}%7%>%>?)11 /{; #&{AK%([%Q!FE%D\F\FKENNfOP &R r   c                    UR                   UR                  peUb#  [        U5      [        U5      :w  a  [        S5      eUb  UR                  S   S:w  a  [        S5      e[
        R                  " USS9n[
        R                  " UR                  5      n[        U5      nUS:  a  [        UR                  S	   5       Hj  n	[
        R                  " X   * 5       HJ  n
X   U
   (       d  M  [        Xi   U
SS24   R                  S	5      Xi   5      S	   S	   nS
X'   SX   X:  '   ML     Ml     Ub  [        Xd5      n/ n[
        R                  " U5      n[        UR                  S	   5       H  n	X   nUR!                  5       R#                  5       (       d  M,  SXU:  '   [
        R                  " U5      S-   nXS-  -
  US-  -  n[
        R$                  " USS5      nUX'   X   S	:  nX   U   nXi   U   nUR'                  USUS.5        M     U$ )a5  
Converts the output of [`OwlViTForObjectDetection.image_guided_detection`] into the format expected by the COCO
api.

Args:
    outputs ([`OwlViTImageGuidedObjectDetectionOutput`]):
        Raw outputs of the model.
    threshold (`float`, *optional*, defaults to 0.0):
        Minimum confidence threshold to use to filter out predicted boxes.
    nms_threshold (`float`, *optional*, defaults to 0.3):
        IoU threshold for non-maximum suppression of overlapping boxes.
    target_sizes (`torch.Tensor`, *optional*):
        Tensor of shape (batch_size, 2) where each entry is the (height, width) of the corresponding image in
        the batch. If set, predicted normalized bounding boxes are rescaled to the target sizes. If left to
        None, predictions will not be unnormalized.

Returns:
    `list[Dict]`: A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
    in the batch as predicted by the model. All labels are set to None as
    `OwlViTForObjectDetection.image_guided_detection` perform one-shot object detection.
NzTMake sure that you pass in as many target sizes as the batch dimension of the logitsr   r5   zTEach element of target_sizes must contain the size (h, w) of each image of the batchr[   r    g      ?r   g              gư>皙?g?r\   )r_   target_pred_boxesra   rb   shaper   r9   rc   rd   r   rangeargsortrE   r*   r3   
zeros_likenonzeronumelcliprg   )rT   rX   rY   nms_thresholdr.   r_   target_boxesprobsr]   idxr/   iousrn   alphasquery_scores	max_scorequery_alphasmask
box_scoresr-   s                       r   #post_process_image_guided_detection8OwlViTImageProcessor.post_process_image_guided_detection   s   ,  '~~w/H/H#Fs<7H(Hstt#(:(:1(=(Bstt		&b)u||, 0= 3\//23|4A!;q> "<#4QT#:#D#DQ#GIZ[\]^_`aD"DG8;FK 45 5 4 #'CL !!&)++A./C!;L'')//11 69L	12 		,/$6I(O<SQL ::lC=L&FK;?DT*J %d+ENNjD5QR' 0* r   rP   )rt   N)rs   g333333?N)__name__
__module____qualname____firstlineno__r	   BICUBICresampler   
image_meanr   	image_stdsizedefault_to_square	crop_size	do_resizedo_center_croprO   do_normalizedo_convert_rgbmodel_input_namesr   r
   rS   r   r   r#   r$   rp   r   __static_attributes____classcell__)rV   s   @r   rG   rG   e   s    !))H!JIC(D-IINJLN'(#!5 # 8<	3.3 3 !4;.5	3jK Kr   rG   )__doc__typingr   r   image_processing_backendsr   image_transformsr   image_utilsr   r   r	   processing_utilsr
   r   utilsr   r   r   modeling_owlvitr   
get_loggerr   loggerr   r3   r6   rE   rG   __all__rP   r   r   <module>r      s    '    ; 8 P P 4 8 8 < 
		H	%G8E  X- X Xv "
"r   