
    Z j"                         S SK Jr  S SKJr  S SKJrJrJrJrJ	r	  S SK
JrJr  S SKJrJr  SSKJr   " S S	\S
S9r " S S\S
S9r\ " S S\5      5       rS/rg)   )BatchFeature)
ImageInput)ImagesKwargsMultiModalDataProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)
TensorTypeauto_docstring   )AutoTokenizerc                   8    \ rS rSr% Sr\\S'   \\S'   \\S'   Srg)AriaImagesKwargs   a  
split_image (`bool`, *optional*, defaults to `False`):
    Whether to split large images into multiple crops. When enabled, images exceeding the maximum size are
    divided into overlapping crops that are processed separately and then combined. This allows processing
    of very high-resolution images that exceed the model's input size limits.
max_image_size (`int`, *optional*, defaults to `980`):
    Maximum image size (in pixels) for a single image crop. Images larger than this will be split into
    multiple crops when `split_image=True`, or resized if splitting is disabled. This parameter controls
    the maximum resolution of individual image patches processed by the model.
min_image_size (`int`, *optional*):
    Minimum image size (in pixels) for a single image crop. Images smaller than this will be upscaled to
    meet the minimum requirement. If not specified, images are processed at their original size (subject
    to the maximum size constraint).
split_imagemax_image_sizemin_image_size N)	__name__
__module____qualname____firstlineno____doc__bool__annotations__int__static_attributes__r       y/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/aria/processing_aria.pyr   r      s     r    r   F)totalc                   L    \ rS rSr% \\S'   SSS.SSS.\R                  S.rSr	g	)
AriaProcessorKwargs1   images_kwargsF)paddingreturn_mm_token_type_ids  )r   r   )text_kwargsr&   return_tensorsr   N)
r   r   r   r   r   r   r   PYTORCH	_defaultsr   r   r    r!   r$   r$   1   s4    ## (-

 " 
 %,,
Ir    r$   c            
          ^  \ rS rSr    SS\\-  S\S-  S\\\-  \4   S-  4U 4S jjjr	\
 SS\\-  \\   -  \\   -  S\S-  S	\\   S
\4S jj5       rSS jr\S 5       rSrU =r$ )AriaProcessorA   N	tokenizerchat_templatesize_conversionc                 ,  > Uc  SSS.nUR                  5        VVs0 s H  u  pV[        U5      U_M     snnU l        UR                  U l        UR                  U l        Ub  UR
                  c  UR                  Ul        [        TU ]!  XUS9  gs  snnf )z`
size_conversion (`Dict`, *optional*):
    A dictionary indicating size conversions for images.
N      )i  r)   )r2   )	itemsr   r3   image_tokenimage_token_id	pad_token	unk_tokensuper__init__)selfimage_processorr1   r2   r3   kv	__class__s          r!   r=   AriaProcessor.__init__C   s     "$'c2O6E6K6K6MN6MdaA	6MN$00'66 Y%8%8%@"+"5"5I=Q  Os   Btextimageskwargsreturnc                 b   U R                   " [        4SU R                  R                  0UD6n[	        U[
        5      (       a  U/nO8[	        U[        5      (       d#  [	        US   [
        5      (       d  [        S5      eUb  U R                  " U40 US   D6nU R                  UR                  R                  S      n/ nUR                  S5      U-  nU HQ  n	U	R                  U R                  R                  U R                  R                  U-  5      n	UR                  U	5        MS     O0 nUnUS   R                  S	S5      n
US   R                  S
S5      nU R                  " U40 US   DS	S0D6nU R!                  X|S/S9  U(       a  U R#                  US   5      US'   [%        0 UEUEU
S9$ )a[  
Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:
    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
    `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
    `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
    - **pixel_mask** -- Pixel mask to be fed to a model. Returned when `images` is not `None`.
tokenizer_init_kwargs    zAInvalid input text. Please provide a string, or a list of stringsNr&   r   	num_cropsr*   r+   r(   Fimage)
modalities	input_idsmm_token_type_ids)datatensor_type)_merge_kwargsr$   r1   init_kwargs
isinstancestrlist	TypeErrorr?   r3   pixel_valuesshapepopreplacer8   append_check_special_mm_tokenscreate_mm_token_type_idsr   )r>   rD   rE   rF   output_kwargsimage_inputstokens_per_imageprompt_stringsrK   sampler+   r(   text_inputss                r!   __call__AriaProcessor.__call__Y   s   " **
"&.."<"<
 
 dC  6DD$''
47C0H0H_``//Y-:XYL#33L4M4M4S4STU4VWN$((58HHI(B(BDNND^D^ajDjk%%f- 
 L!N&}599:JDQ#0#?#C#CD^`e#f nn^i}]7Sidhi%%nwi%X#/3/L/L[YdMe/fK+,!@K!@<!@n]]r    c                    0 nUb  [         R                  R                  S0 5      nUR                  U5        UR                  SS5      =(       d    U R                  R
                  nU Vs/ s H!  nU R                  R                  " / UQUP76 PM#     nnU Vs/ s H  oR                  U   U-  PM     n	nUR                  XS.5        [        S0 UD6$ s  snf s  snf )ay  
Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.
Args:
    image_sizes (`list[list[int]]`, *optional*):
        The input sizes formatted as (height, width) per each image.
Returns:
    `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
    input modalities, along with other useful data.
Nr&   r   )num_image_tokensnum_image_patchesr   )	r$   r-   getupdater?   r   get_number_of_image_patchesr3   r   )
r>   image_sizesrF   vision_datar&   max_size
image_sizeri   num_patchesrh   s
             r!   _get_num_multimodal_tokens(AriaProcessor._get_num_multimodal_tokens   s     "/99==orRM  ($(()94@gDDXDXDgDgH #.!"-J $$@@\*\m\"-  ! arr`qQ\ 4 4X > L`qr4Dmn,,,!  ss   *(CCc                     U R                   R                  nU R                  R                  nU Vs/ s H  o3S:w  d  M
  UPM     nn[        [        R                  X-   5      5      $ s  snf )NrK   )r1   model_input_namesr?   rV   dictfromkeys)r>   tokenizer_input_namesimage_processor_input_namesnames       r!   ru   AriaProcessor.model_input_names   sb     $ @ @&*&:&:&L&L# 9T&k8S_jWjt8S#&kDMM"7"UVWW 'ls
   	A&A&)r8   r9   r3   )NNNN)N)r   r   r   r   r   rU   rv   floatr   r=   r   r   r
   rV   r   r	   r$   r   re   rr   propertyru   r   __classcell__)rB   s   @r!   r/   r/   A   s     )-$(9=R !3&R Tz	R
 eck3./$6R R,  %)0^++d9o=EV@WW0^ T!0^ ,-	0^
 
0^ 0^d-4 X Xr    r/   N)image_processing_utilsr   image_utilsr   processing_utilsr   r   r   r   r	   tokenization_pythonr
   r   utilsr   r   autor   r   r$   r/   __all__r   r    r!   <module>r      se   ( 3 % f f ? /  |5 **%   lXN lX lX^ 
r    