
    Z jo                        S r SSKJr  SSKrSSKJr  SSKJr  SSK	J
r
JrJr  SSKJrJrJrJr  SS	KJr  \R(                  " \5      r\" 5       (       a  SSKrS
 rS rS rS rS rS rS r\" SS9\ " S S\5      5       5       rS/r g)z
Processor class for SAM3.
    )deepcopyN   )
ImageInput)ProcessorMixin)BatchEncodingPreTokenizedInput	TextInput)
TensorTypeauto_docstringis_torch_availablelogging)requiresc                     U R                  S5      u  pp4USU-  -
  USU-  -
  USU-  -   USU-  -   /n[        R                  " USS9$ N      ?dimunbindtorchstackxx_cy_cwhbs         y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/sam3/processing_sam3.pybox_cxcywh_to_xyxyr!   #   sR    XXb\NCa
a-3q=C#'MS37]LA;;qb!!    c                 v    U R                  S5      u  pp4USU-  -
  USU-  -
  X4/n[        R                  " USS9$ r   r   r   s         r    box_cxcywh_to_xywhr$   )   s@    XXb\NCa
a-3q=A4A;;qb!!r"   c                 f    U R                  S5      u  pp#XX-   X-   /n[        R                  " USS9$ Nr   r   r   r   yr   r   r   s        r    box_xywh_to_xyxyr)   /   4    "JA!
AEQU$A;;qb!!r"   c                 v    U R                  S5      u  pp#U SU-  -   USU-  -   X#/n[        R                  " USS9$ r   r   r'   s        r    box_xywh_to_cxcywhr,   5   s@    "JA!
cAg+S1W0A;;qb!!r"   c                 f    U R                  S5      u  pp#XX -
  X1-
  /n[        R                  " USS9$ r&   r   )r   r(   XYr   s        r    box_xyxy_to_xywhr0   ;   r*   r"   c                 |    U R                  S5      u  pp4X-   S-  X$-   S-  X1-
  XB-
  /n[        R                  " USS9$ )Nr      r   r   )r   x0y0x1y1r   s         r    box_xyxy_to_cxcywhr7   A   sB    XXb\NBB
'QA27<A;;qb!!r"   c                 <    U R                  S5      u  pp4X1-
  XB-
  -  $ )z
Batched version of box area. Boxes should be in [x0, y0, x1, y1] format.

Inputs:
- boxes: Tensor of shape (..., 4)

Returns:
- areas: Tensor of shape (...,)
r   )r   )boxesr3   r4   r5   r6   s        r    box_arear:   G   s%     \\"%NBBG  r"   )r   )backendsc                   &  ^  \ rS rSr S"S\S-  S\4U 4S jjjr\       S#S\S-  S\\	-  \
\   -  \
\	   -  S-  S\S-  S	\
\
\
\         \R                  -  S-  S
\
\
\
\         \R                  -  S-  S\
\
\      \R                  -  S-  S\\-  S-  S\4S jj5       rS$S%S jjrS rS&S jrS rS'S jrS(S jrS rS r S'S\R                  \R6                  -  \
-  S\S\S\S\S-  S\
4S jjrS)S jrS*S jrS+S jr   S,S  jr S!r!U =r"$ )-Sam3ProcessorU   Ntarget_sizepoint_pad_valuec                    > [         TU ]  " X40 UD6  X@l        Ub  X0l        gU R                  R                  S   U l        g)z
target_size (`int`, *optional*):
    The target size (target_size, target_size) to which the image will be resized.
point_pad_value (`int`, *optional*, defaults to -10):
    The value used for padding input boxes.
Nheight)super__init__r@   image_processorsizer?   )selfrE   	tokenizerr?   r@   kwargs	__class__s         r    rD   Sam3Processor.__init__X   sA     	>v>.*5*A;tG[G[G`G`aiGjr"   imagestextsegmentation_mapsinput_boxesinput_boxes_labelsoriginal_sizesreturn_tensorsreturnc                    Sn	Ub  U R                   " U4UUS.UD6n	O[UbJ  [        U[        R                  5      (       a  UR	                  5       R                  5       n[        SU0US9n	OUb  [        S5      eU R                  X$5      nUb(  U R                  X'SSS9n
U	b  U	R                  U
5        OU
n	UGb=  U	S   nU R                  US	S
SSS9nU R                  USSSS9nUb  Uc  U R                  U5      nUb  U R                  U5      SS nUb  U R                  U5      SS nUb  Ub  WW:w  a  [        S5      eUbi  U R                  UWS/-   5      n[        R                  " U[        R                   S9nU R#                  UUSSS9  [%        U5      nU	R                  SU05        UbH  U R                  UW5      n[        R                  " U[        R&                  S9nU	R                  SU05        U	$ )a  
images (`ImageInput`, *optional*):
    The image(s) to process.
text (`str`, `list[str]`, `list[list[str]]`, *optional*):
    The text to process.
segmentation_maps (`ImageInput`, *optional*):
    The segmentation maps to process.
input_boxes (`list[list[list[float]]]`, `torch.Tensor`, *optional*):
    The bounding boxes to process.
input_boxes_labels (`list[list[int]]`, `torch.Tensor`, *optional*):
    The labels for the bounding boxes.
original_sizes (`list[list[float]]`, `torch.Tensor`, *optional*):
    The original sizes of the images.

Returns:
    A [`BatchEncoding`] with the following fields:
    - `pixel_values` (`torch.Tensor`): The processed image(s).
    - `original_sizes` (`list[list[float]]`): The original sizes of the images.
    - `labels` (`torch.Tensor`): The processed segmentation maps (if provided).
    - `input_boxes_labels` (`torch.Tensor`): The processed labels for the bounding boxes.
    - `input_boxes` (`torch.Tensor`): The processed bounding boxes.
N)rN   rR   rQ   )tensor_typezKEither images or original_sizes must be provided if input_boxes is not None
max_length    )rR   paddingrV   r   r9   z)[image level, box level, box coordinates]   )expected_depth
input_nameexpected_formatexpected_coord_sizer2   labelsz[image level, box level])rZ   r[   r\   zaInput boxes and labels have inconsistent dimensions. Please ensure they have the same dimensions.)dtypeT)is_bounding_boxpreserve_paddingrO   rP   )rE   
isinstancer   Tensorcputolistr   
ValueError_resolve_text_promptsrH   update_validate_single_input_generate_default_box_labels_get_nested_dimensions_pad_nested_listtensorfloat32_normalize_tensor_coordinatesr7   int64)rG   rL   rM   rN   rO   rP   rQ   rR   rI   encodingtext_inputsprocessed_boxesprocessed_boxes_labelsboxes_max_dimsboxes_labels_max_dimspadded_boxesfinal_boxespadded_boxes_labelsfinal_boxes_labelss                      r    __call__Sam3Processor.__call__e   sl   D ++"3- 	H '.%,,77!/!3!3!5!<!<!>$&6%GUcdH$jkk))$<..Vboq.rK#,& "%&67N"99 " K$% : O &*%@%@" # :	 &A &" */E/M)-)J)J?)[& *!%!<!<_!Mbq!Q%1(,(C(CDZ([\^]^(_% */E/Q!%::${ 
 *#44_nXYWZFZ[#ll<u}}M22X\ 3  1= <=%1&*&;&;<RTi&j#%*\\2EU[[%Y"!57I JKr"   c                     Uu  pE[        U5      R                  5       nU(       a  UR                  SSS5      nUS   U-  US'   US   U-  US'   U(       a  UR                  SS5      nU$ )a  
Expects a numpy array of length 2 in the final dimension. Requires the original image size in (H, W) format.

Args:
    target_size (`int`):
        The target size of the image.
    coords (`torch.Tensor`):
        The coordinates to be normalized.
    original_size (`tuple`):
        The original size of the image.
    is_bounding_box (`bool`, *optional*, defaults to `False`):
        Whether the coordinates are bounding boxes.
r   r2   ).r   ).   rY   )r   floatreshape)rG   coordsoriginal_sizer`   old_hold_ws         r    _normalize_coordinates$Sam3Processor._normalize_coordinates   so     %&!'')^^B1-F%/v%/v^^B*Fr"   c                 T    U Vs/ s H  o"c  SOS/[        U5      -  PM     sn$ s  snf )z[Generate default box labels: `point_pad_value` for None (padded) entries, 1 for real boxes.Nr~   )len)rG   rs   image_boxess      r    rj   *Sam3Processor._generate_default_box_labels   s0    [jk[jK+!s;7G1GG[jkkks   %c           	         Uc  g[        U[        R                  5      (       ad  X2S-
  :X  d  [        UR                  5      S::  a  UR                  5       R                  5       $ U Vs/ s H  o@R                  XBUS-   5      PM     sn$ [        U[        R                  5      (       aV  X2S-
  :X  d  [        UR                  5      S::  a  UR                  5       $ U Vs/ s H  o@R                  XBUS-   5      PM     sn$ [        U[        5      (       a2  X2:X  a  U$ U Vs/ s H  nUb  U R                  XBUS-   5      OSPM     sn$ [        U[        [        45      (       a  U$ [        S[        U5       35      es  snf s  snf s  snf )af  
Recursively convert various input formats (tensors, numpy arrays, lists) to nested lists.
Preserves None values within lists.

Args:
    data: Input data in any format (may be None or contain None values)
    expected_depth: Expected nesting depth
    current_depth: Current depth in recursion

Returns:
    Nested list representation of the data (or None)
Nr2   r~   zUnsupported data type: )rb   r   rc   r   shapenumpyre   _convert_to_nested_listnpndarraylistintr   rf   type)rG   datarZ   current_depthitems        r    r   %Sam3Processor._convert_to_nested_list   sq    < dELL)) 22c$**o6Jzz|**,,jnojnbf44T=[\K\]jnoobjj)) 22c$**o6J{{}$jnojnbf44T=[\K\]jnood##.
 !% $ ^b]mD00}WXGXYsww $  sEl++K6tDzlCDD' p
 ps   'E8E=$Fc                 R   Uc  U(       a  S$ S$ [        U[        [        45      (       d  U$ [        U5      nU(       a<  [        U5      [        U5      :w  a$  [	        S[        U5       S[        U5       S35      e[        U5       H  u  p4Ub  M
  U(       d  M  X#   c  M  SX'   M      U$ )zA
Resolve text prompts by setting defaults based on prompt types.
NvisualzEThe number of text prompts must match the number of input boxes. Got z text prompts and z input boxes.)rb   r   tupler   rf   	enumerate)rG   rM   rO   i
text_values        r    rg   #Sam3Processor._resolve_text_prompts  s    
 <*844$u..K Dz3t9K(884yk!3C4D3E]T  't_MA!kkkn6P" - r"   c                    Uc  / n[        U[        5      (       d  U$ [        U5      S:X  a  UR                  [        U5      5        O[	        US   [        U5      5      US'   [        U5      S:  a  U H  nUc  M  [        U[        5      (       d  M  U R                  U5      n[        U5       H@  u  pVUS-   [        U5      :  a  UR                  U5        M*  [	        X%S-      U5      X%S-   '   MB     M     U$ )aH  
Get the maximum dimensions at each level of nesting, skipping None values.

Args:
    nested_list (`list`):
        Nested list structure (may contain None values).
    max_dims (`list`, *optional*):
        Current maximum dimensions (for recursion).

Returns:
    `list`: A list of maximum dimensions for each nesting level.
r   r~   )rb   r   r   appendmaxrk   r   )rG   nested_listmax_dimsr   sub_dimsr   r   s          r    rk   $Sam3Processor._get_nested_dimensions6  s     H+t,,Ox=AOOC,-hqk3{+;<HQK{a#<dD))#::4@H"+H"5q5CM1$OOC0.1(q5/3.GHUO	 #6 $ r"   c                 |   Uc  U R                   nU[        U5      :  a  U$ [        U[        5      (       d  U/n[        U5      nX#   nU[        U5      S-
  :X  a  UR	                  U/Xe-
  -  5        OUS:  am  U[        U5      S-
  :  a  X#S-   S nU R                  Xt5      nOU/X#S-      -  nUR	                  [        Xe-
  5       V	s/ s H  n	[        U5      PM     sn	5        OLX#S-   S nU R                  Xt5      nUR	                  [        U5       V	s/ s H  n	[        U5      PM     sn	5        U[        U5      S-
  :  ao  [        [        U5      5       HW  n
X   c  X#S-   S nU R                  Xt5      X'   M$  [        X   [        5      (       d  M=  U R                  X   X#S-   U5      X'   MY     U$ s  sn	f s  sn	f )a  
Recursively pad a nested list to match target dimensions. Replaces None values with padded structures.

Args:
    nested_list (`list`):
        Nested list to pad (may contain None values).
    target_dims (`list`):
        Target dimensions for each level.
    current_level (`int`, *optional*, defaults to 0):
        Current nesting level.
    pad_value (`int`, *optional*):
        Value to use for padding.

Returns:
    `list`: The padded nested list.
Nr~   r   r2   )	r@   r   rb   r   extend_create_empty_nested_structureranger   rl   )rG   r   target_dimscurrent_level	pad_valuecurrent_sizer?   template_dimstemplate_r   s              r    rl   Sam3Processor._pad_nested_list^  s   " ,,IC,, +t,,&-K ;'!0 C,q00	{k.HIJ a 3{#3a#77$/0A0C$DM#BB=\H !*{[9J-KKH""kF`@a#b@a1HX$6@a#bc !,A,=,? @>>}X""k@R#S@R1HX$6@R#ST 3{+a//3{+,>)$/0A0C$DM%)%H%H%bKN55%)%:%:;>;hiXikt%uKN - # $c
 $Ts   F4F9c                     [        U5      S:X  a	  U/US   -  $ [        US   5       Vs/ s H  o0R                  USS U5      PM     sn$ s  snf )z
Create an empty nested structure with given dimensions filled with pad_value.

Args:
    dims (`list`):
        The dimensions of the nested structure.
    pad_value (`int`):
        The value to fill the structure with.
r~   r   N)r   r   r   )rG   dimsr   r   s       r    r   ,Sam3Processor._create_empty_nested_structure  sX     t9>;a((V[\`ab\cVdeVdQR77QR)LVdeees   A
c                    [        U[        5      (       a3  [        U5      S:X  a  gU H  nUc  M  SU R                  U5      -   s  $    g[        U[        R
                  [        R                  45      (       a  [        UR                  5      $ g)z
Get the nesting level of a list structure, skipping None values.

Args:
    input_list (`list`):
        The list to get the nesting level of.
r   r~   )	rb   r   r   _get_nesting_levelr   r   r   rc   r   )rG   
input_listr   s      r    r    Sam3Processor._get_nesting_level  sy     j$'':!#"#t66t<<< # 
RZZ$>??z''((r"   r   rZ   r[   r\   r]   c                    Uc  g[        U[        R                  [        R                  45      (       a{  UR
                  U:w  a"  [        SU SU SU SUR
                   S3	5      eUb5  UR                  S   U:w  a"  [        SU SU SUR                  S    S	35      eU R                  X5      $ [        U[        5      (       a?  U R                  U5      nXb:w  a  [        SU S
U SU SU S3	5      eU R                  X5      $ g)aW  
        Validate a single input by ensuring proper nesting and raising an error if the input is not valid.

        Args:
            data (`torch.Tensor`, `np.ndarray`, or `list`):
                Input data to process.
            expected_depth (`int`):
                Expected nesting depth.
            input_name (`str`):
                Name of the input for error messages.
            expected_format (`str`):
                The expected format of the input.
            expected_coord_size (`int`, *optional*):
                Expected coordinate size (4 for boxes, None for labels).
.
NzInput z must be a tensor/array with z, dimensions. The expected nesting format is z. Got z dimensions.r   z as the last dimension, got .z must be a nested list with z( levels. The expected nesting format is z levels.)rb   r   rc   r   r   ndimrf   r   r   r   r   )rG   r   rZ   r[   r\   r]   r   s          r    ri   $Sam3Processor._validate_single_input  s   0 < dU\\2::677yyN* ZL(EnEU  VB  CR  BS  SY  Z^  Zc  Zc  Yd  dp  q  %0::b>%88$ ,IJ]I^^z{  |F  |F  GI  |J  {K  KL  M  //EE dD!! 33D9M. ZL(D^DTT|  ~M  }N  NT  Ub  Tc  ck  l  //EE "r"   c                 ~   U(       a  XR                   :g  nUR                  SSS9n[        [        U5      5       H  nXqR                  S   :  d  M  U[        U5      :  a  X'   OUS   nU R                  X   XS9n	U(       a4  WU   n
[        R                  " U
R                  X   5      XU   5      X'   M|  XU'   M     g)a  
Helper method to normalize coordinates in a tensor across multiple images.

Args:
    tensor (`torch.Tensor`):
        Input tensor with coordinates.
    original_sizes (`list`):
        Original image sizes.
    is_bounding_box (`bool`, *optional*, defaults to `False`):
        Whether coordinates are bounding boxes.
    preserve_padding (`bool`, *optional*, defaults to `False`):
        Whether to preserve padding values (for boxes).
r   T)r   keepdimr   )r`   N)	r@   allr   r   r   r   r   where	expand_as)rG   rm   rQ   r`   ra   mask
coord_maskimg_idxr   normalized_coordsimg_masks              r    ro   +Sam3Processor._normalize_tensor_coordinates  s     111Db$7JS01Ga(;BSEX;X 7^lmn^o$($?$?O] %@ %! $)'2H&+kk **6?;=NW^P_'FO '87O 2r"   c                 :    U R                   R                  XU5      $ )a  
Converts the output of [`Sam3Model`] into semantic segmentation maps.

Args:
    outputs ([`Sam3ImageSegmentationOutput`]):
        Raw outputs of the model containing semantic_seg.
    target_sizes (`list[tuple]` of length `batch_size`, *optional*):
        List of tuples corresponding to the requested final size (height, width) of each prediction. If unset,
        predictions will not be resized.
    threshold (`float`, *optional*, defaults to 0.5):
        Threshold for binarizing the semantic segmentation masks.

Returns:
    semantic_segmentation: `list[torch.Tensor]` of length `batch_size`, where each item is a semantic
    segmentation map of shape (height, width) corresponding to the target_sizes entry (if `target_sizes` is
    specified). Each entry is a binary mask (0 or 1).
)rE   "post_process_semantic_segmentation)rG   outputstarget_sizes	thresholds       r    r   0Sam3Processor.post_process_semantic_segmentation  s    $ ##FFw^ghhr"   c                 :    U R                   R                  XU5      $ )a  
Converts the raw output of [`Sam3Model`] into final bounding boxes in (top_left_x, top_left_y,
bottom_right_x, bottom_right_y) format. This is a convenience wrapper around the image processor method.

Args:
    outputs ([`Sam3ImageSegmentationOutput`]):
        Raw outputs of the model containing pred_boxes, pred_logits, and optionally presence_logits.
    threshold (`float`, *optional*, defaults to 0.3):
        Score threshold to keep object detection predictions.
    target_sizes (`list[tuple[int, int]]`, *optional*):
        List of tuples (`tuple[int, int]`) containing the target size `(height, width)` of each image in the
        batch. If unset, predictions will not be resized.

Returns:
    `list[dict]`: A list of dictionaries, each dictionary containing the following keys:
        - **scores** (`torch.Tensor`): The confidence scores for each predicted box on the image.
        - **boxes** (`torch.Tensor`): Image bounding boxes in (top_left_x, top_left_y, bottom_right_x,
          bottom_right_y) format.

Example:

```python
>>> from transformers import AutoModel, AutoProcessor
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> model = AutoModel.from_pretrained("facebook/sam3-base")
>>> processor = AutoProcessor.from_pretrained("facebook/sam3-base")

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))
>>> inputs = processor(images=image, text="cat", return_tensors="pt")
>>> outputs = model(**inputs)

>>> # Post-process to get bounding boxes
>>> results = processor.post_process_object_detection(outputs, threshold=0.3, target_sizes=[image.size[::-1]])
>>> boxes = results[0]["boxes"]
>>> scores = results[0]["scores"]
```
)rE   post_process_object_detection)rG   r   r   r   s       r    r   +Sam3Processor.post_process_object_detection.  s    V ##AA'Vbccr"   c                 :    U R                   R                  XX45      $ )a)  
Converts the raw output of [`Sam3Model`] into instance segmentation predictions with bounding boxes and masks.
This is a convenience wrapper around the image processor method.

Args:
    outputs ([`Sam3ImageSegmentationOutput`]):
        Raw outputs of the model containing pred_boxes, pred_logits, pred_masks, and optionally
        presence_logits.
    threshold (`float`, *optional*, defaults to 0.3):
        Score threshold to keep instance predictions.
    mask_threshold (`float`, *optional*, defaults to 0.5):
        Threshold for binarizing the predicted masks.
    target_sizes (`list[tuple[int, int]]`, *optional*):
        List of tuples (`tuple[int, int]`) containing the target size `(height, width)` of each image in the
        batch. If unset, predictions will not be resized.

Returns:
    `list[dict]`: A list of dictionaries, each dictionary containing the following keys:
        - **scores** (`torch.Tensor`): The confidence scores for each predicted instance on the image.
        - **boxes** (`torch.Tensor`): Image bounding boxes in (top_left_x, top_left_y, bottom_right_x,
          bottom_right_y) format.
        - **masks** (`torch.Tensor`): Binary segmentation masks for each instance, shape (num_instances,
          height, width).

Example:

```python
>>> from transformers import AutoModel, AutoProcessor
>>> from PIL import Image
>>> import httpx
>>> from io import BytesIO

>>> model = AutoModel.from_pretrained("facebook/sam3-base")
>>> processor = AutoProcessor.from_pretrained("facebook/sam3-base")

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> with httpx.stream("GET", url) as response:
...     image = Image.open(BytesIO(response.read()))
>>> inputs = processor(images=image, text="cat", return_tensors="pt")
>>> outputs = model(**inputs)

>>> # Post-process to get instance segmentation
>>> results = processor.post_process_instance_segmentation(
...     outputs, threshold=0.3, target_sizes=[image.size[::-1]]
... )
>>> masks = results[0]["masks"]
>>> boxes = results[0]["boxes"]
>>> scores = results[0]["scores"]
```
)rE   "post_process_instance_segmentation)rG   r   r   mask_thresholdr   s        r    r   0Sam3Processor.post_process_instance_segmentation[  s#    r ##FF
 	
r"   )r@   r?   )Ni)NNNNNNN)F)r   torch.TensorrS   r   )r   )N)r   N)FF)Nr   )333333?N)r   r   N)#__name__
__module____qualname____firstlineno__r   rD   r   r   r	   r   r   r   r   rc   strr
   r   r{   r   rj   r   rg   rk   rl   r   r   r   r   ri   ro   r   r   r   __static_attributes____classcell__)rJ   s   @r    r=   r=   U   s    bek7:Tzk[^k k  %)ae/3EIJNBF26jT!j ++d9o=EV@WWZ^^j &,	j
 $tE{+,u||;dBj !d3i1ELL@4Gj T%[)ELL84?j j(4/j 
j jX6l(ET6&P@Df8 +/0FllRZZ'$.0F 0F 	0F
 0F !4Z0F 
0Fd!8Fi(+d` ;
 ;
r"   r=   )!__doc__copyr   r   r   image_utilsr   processing_utilsr   tokenization_utils_baser   r   r	   utilsr
   r   r   r   utils.import_utilsr   
get_loggerr   loggerr   r!   r$   r)   r,   r0   r7   r:   r=   __all__ r"   r    <module>r      s      % . R R L L * 
		H	%""""""! 
:
N 
  
D 
r"   