
    Z j(                        S SK JrJrJr  SSKJrJrJrJrJ	r	  SSK
JrJr  \" 5       (       a  S SKJr  SSKJrJr  \" 5       (       a  S SKrS S	KJr  SS
KJr  \R.                  " \5      r\" \" SS95       " S S\5      5       rg)    )AnyUnionoverload   )add_end_docstringsis_torch_availableis_vision_availableloggingrequires_backends   )ChunkPipelinebuild_pipeline_init_args)Image)
load_imagevalid_imagesN)BaseModelOutput)2MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMEST)has_image_processorc                     ^  \ rS rSrSrSrSrSrSrU 4S jr	\
S\\S4   S\\\   -  S	\S
\\\\4      4S j5       r\
S\\\\4      S	\S
\\\\\4         4S j5       r SS\\S\\\\4      4   S\\\   -  S-  S	\S
\\\\4      \\\\\4         -  4U 4S jjjrS rSS jrS rSS jrSSS
\\\4   4S jrSrU =r$ )ZeroShotObjectDetectionPipeline   aN  
Zero shot object detection pipeline using `OwlViTForObjectDetection`. This pipeline predicts bounding boxes of
objects when you provide an image and a set of `candidate_labels`.

Example:

```python
>>> from transformers import pipeline

>>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
>>> detector(
...     "http://images.cocodataset.org/val2017/000000039769.jpg",
...     candidate_labels=["cat", "couch"],
... )
[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]

>>> detector(
...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
...     candidate_labels=["head", "bird"],
... )
[{'score': 0.119, 'label': 'bird', 'box': {'xmin': 71, 'ymin': 170, 'xmax': 410, 'ymax': 508}}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier:
`"zero-shot-object-detection"`.

See the list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=zero-shot-object-detection).
FTc                 h   > [         TU ]  " S0 UD6  [        U S5        U R                  [        5        g )Nvision )super__init__r   check_model_typer   )selfkwargs	__class__s     ڂ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/pipelines/zero_shot_object_detection.pyr   (ZeroShotObjectDetectionPipeline.__init__=   s,    "6"$)PQ    imagezImage.Imagecandidate_labelsr   returnc                     g Nr   )r   r$   r%   r   s       r!   __call__(ZeroShotObjectDetectionPipeline.__call__C   s      #r#   c                     g r(   r   )r   r$   r   s      r!   r)   r*   H   s    ber#   Nc           	      \  > SU;   a  UR                  S5      n[        U[        [        R                  45      (       a  XS.nOX[        U[        [
        45      (       a:  [        U5      (       a*  [	        [        TU ]   " S [        X5       5       40 UD65      $  Un[        TU ]   " U40 UD6nU$ )a$  
Detect objects (bounding boxes & classes) in the image(s) passed as inputs.

Args:
    image (`str`, `PIL.Image` or `list[dict[str, Any]]`):
        The pipeline handles three types of images:

        - A string containing an http url pointing to an image
        - A string containing a local path to an image
        - An image loaded in PIL directly

        You can use this parameter to send directly a list of images, or a dataset or a generator like so:

        ```python
        >>> from transformers import pipeline

        >>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
        >>> detector(
        ...     [
        ...         {
        ...             "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
        ...             "candidate_labels": ["cat", "couch"],
        ...         },
        ...         {
        ...             "image": "http://images.cocodataset.org/val2017/000000039769.jpg",
        ...             "candidate_labels": ["cat", "couch"],
        ...         },
        ...     ]
        ... )
        [[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.25, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}], [{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]]
        ```


    candidate_labels (`str` or `list[str]` or `list[list[str]]`):
        What the model should recognize in the image.

    threshold (`float`, *optional*, defaults to 0.1):
        The probability necessary to make a prediction.

    top_k (`int`, *optional*, defaults to None):
        The number of top predictions that will be returned by the pipeline. If the provided number is `None`
        or higher than the number of predictions available, it will default to the number of predictions.

    timeout (`float`, *optional*, defaults to None):
        The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
        the call may block forever.


Return:
    A list of lists containing prediction results, one list per input image. Each list contains dictionaries
    with the following keys:

    - **label** (`str`) -- Text query corresponding to the found object.
    - **score** (`float`) -- Score corresponding to the object (between 0 and 1).
    - **box** (`dict[str,int]`) -- Bounding box of the detected object in image's original size. It is a
      dictionary with `x_min`, `x_max`, `y_min`, `y_max` keys.
text_queriesr$   r%   c              3   .   #    U  H  u  pXS .v   M     g7f)r.   Nr   ).0imglabelss      r!   	<genexpr>;ZeroShotObjectDetectionPipeline.__call__.<locals>.<genexpr>   s     pSoKCs?Sos   )
pop
isinstancestrr   listtupler   r   r)   zip)r   r$   r%   r   inputsresultsr    s         r!   r)   r*   K   s    ~ V#%zz.9ec5;;/00$KFe}--,u2E2E pSVW\Sop  F'"64V4r#   c                 h    0 nSU;   a  US   US'   0 nSU;   a  US   US'   SU;   a  US   US'   U0 U4$ )Ntimeout	thresholdtop_kr   )r   r   preprocess_paramspostprocess_paramss       r!   _sanitize_parameters4ZeroShotObjectDetectionPipeline._sanitize_parameters   sc    +1)+<i(& .4[.A{+f*0/w' "&888r#   c              #     #    [        US   US9nUS   n[        U[        5      (       a  UR                  S5      n[        R
                  " UR                  UR                  //[        R                  S9n[        U5       H[  u  pgU R                  USS9nU R                  USS9n	U	R                  U R                  5      n	U[        U5      S-
  :H  UUS	.UEU	Ev   M]     g 7f)
Nr$   )r>   r%   ,)dtypept)return_tensorsr   )is_lasttarget_sizecandidate_label)r   r6   r7   splittorchtensorheightwidthint32	enumerate	tokenizerimage_processortorG   len)
r   r;   r>   r$   r%   rK   irL   text_inputsimage_featuress
             r!   
preprocess*ZeroShotObjectDetectionPipeline.preprocess   s     6'?G<!"45&,,/55c:llU\\5;;$?#@T"+,<"=A...NK!11%1MN+..tzz:N$4 5 99*#2 	
 ! 	 #>s   CC c                     UR                  S5      nUR                  S5      nUR                  S5      nU R                  " S0 UD6nX#US.UEnU$ )NrK   rL   rJ   )rK   rL   rJ   r   )r5   model)r   model_inputsrK   rL   rJ   outputsmodel_outputss          r!   _forward(ZeroShotObjectDetectionPipeline._forward   s[    "&&}5&**+<=""9-**,|,(3dkwovwr#   c                 l   / nU H  nUS   n[        U5      nU R                  R                  XRUS   S9S   nUS   R                  5        HI  nUS   U   R	                  5       n	U R                  US   U   S   5      n
XU
S.nUR                  U5        MK     M     [        US S	S
9nU(       a  US U nU$ )NrL   rK   )r`   r?   target_sizesr   scoresboxes)scorelabelboxc                     U S   $ )Nrh   r   )xs    r!   <lambda>=ZeroShotObjectDetectionPipeline.postprocess.<locals>.<lambda>   s    '
r#   T)keyreverse)r   rU   post_process_object_detectionnonzeroitem_get_bounding_boxappendsorted)r   ra   r?   r@   r<   model_outputri   r`   indexrh   rj   results               r!   postprocess+ZeroShotObjectDetectionPipeline.postprocess   s    )L !23E*<8L**HH$UbHc I G !*224)%0557,,WW-=e-DQ-GH#(Ev& 5 * &:DIfuoGr#   rj   ztorch.Tensorc                 V    UR                  5       R                  5       u  p#pEUUUUS.nU$ )z
Turns list [xmin, xmax, ymin, ymax] into dict { "xmin": xmin, ... }

Args:
    box (`torch.Tensor`): Tensor containing the coordinates in corners format.

Returns:
    bbox (`dict[str, int]`): Dict containing the coordinates in corners format.
)xminyminxmaxymax)inttolist)r   rj   r}   r~   r   r   bboxs          r!   rt   1ZeroShotObjectDetectionPipeline._get_bounding_box   s7     "%!1!1!3D	
 r#   r   r(   )g?N)__name__
__module____qualname____firstlineno____doc___load_processor_load_image_processor_load_feature_extractor_load_tokenizerr   r   r   r7   r8   r   dictr)   rC   r[   rb   rz   r   rt   __static_attributes____classcell__)r    s   @r!   r   r      sw   @ O #OR #3-.#BES	/#]`#	d38n	# # ed4S>2eced4PTUXZ]U]P^K_F`e e
 48VS-d38n)==>V S	/D0V 	V
 
d38n	T$sCx.%9 :	:V Vp	9&,^ S#X  r#   r   )typingr   r   r   utilsr   r   r	   r
   r   baser   r   PILr   image_utilsr   r   rN   transformers.modeling_outputsr   models.auto.modeling_autor   
get_loggerr   loggerr   r   r#   r!   <module>r      sq    ' ' k k 9 6=^			H	% ,FG[m [ H[r#   