
    Z j\                     R    S r SSKJr  SSKJr  SSKJr  \ " S S\5      5       rS/rg)z(
Image/Text processor class for CLIPSeg
   )ProcessorMixin)BatchEncoding)auto_docstringc                   @   ^  \ rS rSrSU 4S jjr\SS j5       rSrU =r$ )CLIPSegProcessor   c                 $   > [         TU ]  X5        g )N)super__init__)selfimage_processor	tokenizerkwargs	__class__s       /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/clipseg/processing_clipseg.pyr   CLIPSegProcessor.__init__   s    4    c                    Uc  Uc  Uc  [        S5      eUb  Ub  [        S5      eU R                  " U R                  4SU R                  R                  0UD6nUb  U R                  " U4SU0US   D6nUb  U R
                  " U4SU0US   D6nUb  U R
                  " U4SU0US   D6n	Ub  Ub  W	R                  WR                  S.nU$ Ub  Ub  W	R                  WS'   U$ Ub  W$ Ub  S	WR                  0nU$ [        [        S0 W	D6US
9$ )a  
visual_prompt (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`, `list[torch.Tensor]`):
    The visual prompt image or batch of images to be prepared. Each visual prompt image can be a PIL image,
    NumPy array or PyTorch tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape
    (C, H, W), where C is a number of channels, H and W are image height and width.

Returns:
    [`BatchEncoding`]: A [`BatchEncoding`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
z9You have to specify either text, visual prompt or images.zMYou have to specify exactly one type of prompt. Either text or visual prompt.tokenizer_init_kwargsreturn_tensorstext_kwargsimages_kwargs)pixel_valuesconditional_pixel_valuesr   r   )datatensor_type )	
ValueError_merge_kwargsvalid_processor_kwargsr   init_kwargsr   r   r   dict)
r   textimagesvisual_promptr   r   output_kwargsencodingprompt_featuresimage_featuress
             r   __call__CLIPSegProcessor.__call__   sx   " <M1fnXYY 9lmm**''
?C~~?Y?Y
]c
 ~~dj>j][hMijH$"22.<@Mo@^O !11'59F9WN $); . ; ;,;,H,HH O&"4'5'B'BH^$OO&*O,H,HH O d&<^&<.YYr   r   )NN)NNNN)	__name__
__module____qualname____firstlineno__r   r   r*   __static_attributes____classcell__)r   s   @r   r   r      s    5 8Z 8Zr   r   N)	__doc__processing_utilsr   tokenization_utils_baser   utilsr   r   __all__r   r   r   <module>r7      s>    / 4 # =Z~ =Z =Z@ 
r   