
    Z j$                        S r SSKrSSKJr  SSKJrJr  SSKJ	r	J
r
JrJr  SSKJrJr  SSKJrJrJr  SS	KJr  \" 5       (       a  S
SKJr  \R0                  " \5      r " S S\
SS9rS\4S jrS r\\" SS9 " S S\5      5       5       rS/r g)z
Processor class for Pixtral.
    N   )BatchFeature)
ImageInputis_valid_image)MultiModalDataProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)auto_docstringis_vision_availablelogging)requires   )get_resize_output_image_sizec                   (    \ rS rSrSSS.SS0S.rSrg)	PixtralProcessorKwargs(   F)paddingreturn_mm_token_type_idsreturn_tensorspt)text_kwargscommon_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       /root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pixtral/processing_pixtral.pyr   r   (   s"     (-

 d
Ir#   r   F)totalreturnc                 R    [        U [        5      =(       a    U R                  S5      $ )Nhttp)
isinstancestr
startswith)vals    r$   is_urlr-   5   s    c3:CNN6$::r#   c                 <    [        U 5      =(       d    [        U 5      $ N)r-   r   )elems    r$   is_image_or_image_urlr1   :   s    $</>$//r#   )torchvisiontorch)backendsc            
          ^  \ rS rSr        SS\S\4U 4S jjjr\  SS\S-  S\\	-  \
\   -  \
\	   -  S\\   S	\4S
 jj5       rSS jr\S 5       rSrU =r$ )PixtralProcessor>   N
patch_sizespatial_merge_sizec	                   > [         T
U ]  XUS9  X0l        X@l        X`l        UR                  U R                  5      U l        Xpl        Xl        UR                  U R                  5      U l        UR                  U R                  5      U l	        UR                  U R                  5      U l
        U R                  U R                  U R                  /U l        g)a>  
patch_size (`int`, *optional*, defaults to 16):
    Patch size from the vision tower.
spatial_merge_size (`int`, *optional*, defaults to 1):
    The downsampling factor for the spatial merge operation.
image_token (`str`, *optional*, defaults to `"[IMG]"`):
    Special token used to denote image location.
image_break_token (`str`, *optional*, defaults to `"[IMG_BREAK]"`):
    Special token used to denote the end of a line of pixels in an image.
image_end_token (`str`, *optional*, defaults to `"[IMG_END]"`):
    Special token used to denote the end of an image input.
)chat_templateN)super__init__r8   r9   image_tokenconvert_tokens_to_idsimage_token_idimage_break_tokenimage_end_tokenimage_break_token_idimage_end_token_id	image_ids)selfimage_processor	tokenizerr8   r9   r;   r>   rA   rB   kwargs	__class__s             r$   r=   PixtralProcessor.__init__A   s    0 	=Q$"4&'==d>N>NO!2.'==d>N>NO$-$C$CDDZDZ$[!"+"A"A$BVBV"W--t/H/H$JaJabr#   imagestextrI   r&   c           	      &   U R                   " [        4S[        U R                  S0 5      0UD6nU R                  U R
                  -  nUb  XTS   S'   U R                  " U40 US   D6nO0 n[        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eUnUR                  S5      Gb%  [        US	   5      n/ n/ n	U GH  n
U R                  U
;   a  [        U5      u  pX-  nX-  nU R                  /U-  U R                  /-   /U-  nU VVs/ s H  nU  H  nUPM     M     nnnU R                   US
'   SR#                  U5      nU	R%                  U5        U
R'                  U R                  SS5      n
U R                  U
;   a  M  SU
;   a,  U	R)                  S5      nU
R'                  SUS5      n
SU
;   a  M,  UR%                  U
5        GM     US   R)                  SS5      nUS   R)                  SS5      nUS   R)                  SS5        U R                  " U40 US   DSS0D6nU R+                  UUS/S9  U(       a  U R-                  US   5      US'   [/        0 UEUEUS9$ s  snnf )a  
Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
    `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
tokenizer_init_kwargsinit_kwargsNimages_kwargsr8   r   zAInvalid input text. Please provide a string, or a list of stringspixel_valuesimage_sizes z<placeholder>r   r   r   r   Freturn_token_type_idsimage)
modalities	input_idsmm_token_type_ids)datatensor_type)_merge_kwargsr   getattrrH   r8   r9   rG   r)   r*   list	TypeErrorgetiterr>   nextrA   rB   joinappendreplacepop_check_special_mm_tokenscreate_mm_token_type_idsr   )rF   rL   rM   rI   output_kwargsr8   image_inputsprompt_stringsrS   replace_stringssampleheightwidthnum_height_tokensnum_width_tokensreplace_tokenssublistitemreplace_strr   r   text_inputss                         r$   __call__PixtralProcessor.__call__f   s   $ **"
")$..-"L
 
 __t'>'>>
;E/*<8//Y-:XYLLdC  6DD$''
47C0H0H_`` N+7|M:;KN O&&&0$($5MF(.(<%',':$))*-==AWAW@XX&)&*N ;I%].wU\TdU\d.N%])-)=)=N2&"$''."9K#**;7#^^D,<,<oqQF &&&0 &/"1"5"5a"8K#^^O[!LF &/ %%f-% ( '}599:JDQ#0#?#C#CD^`e#f m$(()@$Gnn^i}]7Sidhi%%nkwi%X#/3/L/L[YdMe/fK+,!@K!@<!@n]]) &^s   5Jc                    0 nUb  [         R                  R                  S0 5      nUR                  U5        UR                  SS5      =(       d    U R                  R
                  nU R                  U R                  -  n/ nU HP  u  p[        [        R                  " XS45      US   US   4Xf4S9u  pX-  nX-  nUR                  US-   U-  5        MR     S/[        U5      -  nUR                  X~S.5        [        S	0 UD6$ )
a{  
Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.

Args:
    image_sizes (`list[list[int]]`, *optional*):
        The input sizes formatted as (height, width) per each image.

Returns:
    `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
    input modalities, along with other useful data.
NrQ   sizer   longest_edge)r{   r8   r   )num_image_tokensnum_image_patchesr   )r   r!   ra   updaterG   r{   r8   r9   r   npzerosre   lenr   )rF   rS   rI   vision_datarQ   r{   r8   r}   ro   rp   resized_heightresized_widthrq   rr   r~   s                  r$   _get_num_multimodal_tokens+PixtralProcessor._get_num_multimodal_tokens   s    "2<<@@RTUM  ( $$VT2Od6J6J6O6OD4+B+BBJ!!,0LHHfQ/0~.^0DE *71-
 %3$@!#0#>  '')9A)=AR(RS "- "#c+&6 64Dmn,,,r#   c                 j    U R                   R                  nU R                  R                  nX-   S/-   $ )NrS   )rH   model_input_namesrG   )rF   tokenizer_input_namesimage_processor_input_namess      r$   r   "PixtralProcessor.model_input_names   s4     $ @ @&*&:&:&L&L#$Bm_TTr#   )	rA   rC   rB   rD   rE   r>   r@   r8   r9   )NN   r   Nz[IMG]z[IMG_BREAK]z	[IMG_END])NNr/   )r   r   r   r    intr=   r   r   r   r   r_   r
   r   r   rx   r   propertyr   r"   __classcell__)rJ   s   @r$   r6   r6   >   s    
 "#'##c 	#c
  #c #cJ  %)Z^I^T!I^ ++d9o=EV@WWI^ /0	I^
 
I^ I^V"-H U Ur#   r6   )!__doc__numpyr   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r   r	   r
   tokenization_utils_baser   r   utilsr   r   r   utils.import_utilsr   image_processing_pixtralr   
get_loggerr   loggerr   boolr-   r1   r6   __all__r   r#   r$   <module>r      s     4 5  D A A * F 
		H	%	-U 	;4 ;
0 	+,ZU~ ZU - ZUz 
r#   