
    Z j,                        S r SSKrSSKJr  SSKJrJr  SSKJ	r	J
r
JrJrJr  SSKJrJrJr  SSKJrJr  \R*                  " \5      rS	r\" S
5       V s/ s H
  n SU S S3PM     sn \" S5       V s/ s H
  n SU S S3PM     sn -   r " S S\5      r " S S\
SS9rS\4S jrS rS r S r!\ " S S\5      5       r"S/r#gs  sn f s  sn f )z 
Processor class for PaliGemma.
    N   )BatchFeature)
ImageInputis_valid_image)MultiModalDataProcessingKwargsProcessorMixin
TextKwargsUnpack)
AddedTokenPreTokenizedInput	TextInput)auto_docstringloggingz<image>i   z<locz0>4>   z<segz0>3c                   H    \ rS rSr% Sr\\-  \\   -  \\   -  S-  \S'   Sr	g)PaliGemmaTextKwargs'   a  
suffix (`str`, `list[str]`, `list[list[str]]`):
    The suffixes or batch of suffixes to be encoded. Only necessary for finetuning. See https://github.com/google-research/big_vision/blob/main/big_vision/configs/proj/paligemma/README.md
    for more information. If your prompt is "<image> What is on the image", the suffix corresponds to the expected prediction "a cow sitting on a bench".
Nsuffix )
__name__
__module____qualname____firstlineno____doc__r   r   list__annotations____static_attributes__r       ڃ/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/paligemma/processing_paligemma.pyr   r   '   s/     ))DO;dCT>UUX\\\r    r   c                   4    \ rS rSr% \\S'   SSS.SS0S.rSrg	)
PaliGemmaProcessorKwargs1   text_kwargsF)paddingreturn_mm_token_type_idsdata_formatchannels_first)r%   images_kwargsr   N)r   r   r   r   r   r   	_defaultsr   r   r    r!   r#   r#   1   s)    $$ (-

 +
Ir    r#   F)totalreturnc                 R    [        U [        5      =(       a    U R                  S5      $ )Nhttp)
isinstancestr
startswith)vals    r!   is_urlr4   ?   s    c3:CNN6$::r    c                 <    [        U 5      =(       d    [        U 5      $ N)r4   r   elems    r!   is_image_or_image_urlr9   D   s    $</>$//r    c                 F    [        U [        5      =(       d    [        U 5      $ r6   )r0   r1   r9   r7   s    r!   _is_str_or_imager;   H   s    dS"A&;D&AAr    c                     X2-  U-   U U  S3$ )a  
Builds a string from the input prompt and image tokens.
For example, for the call:
build_string_from_input(
    prompt="Prefix str"
    bos_token="<s>",
    image_seq_len=3,
    image_token="<im>",
)
The output will be:
"<im><im><im><s>Initial str"
Args:
    prompt (`list[Union[str, ImageInput]]`): The input prompt.
    bos_token (`str`): The beginning of sentence token.
    image_seq_len (`int`): The length of the image sequence.
    image_token (`str`): The image token.
    num_images (`int`): Number of images in the prompt.

r   prompt	bos_tokenimage_seq_lenimage_token
num_imagess        r!   build_string_from_inputrD   L   s"    & )J67	{6("MMr    c            
          ^  \ rS rSr   SU 4S jjr\  SS\S-  S\\-  \	\   -  \	\   -  S\
\   S\4S jj5       rSS	 jr\S
 5       rSrU =r$ )PaliGemmaProcessorb   Nc                   > [        US5      (       d  [        S5      eUR                  U l        [        US5      (       dK  [        [        SSS9nSU/0nUR                  U5        UR                  [        5      U l        [        U l        O"UR                  U l        UR                  U l        UR                  [        5        SUl        SUl        [        TU ]9  XUS9  g )	Nimage_seq_lengthz;Image processor is missing an `image_seq_length` attribute.rB   FT)
normalizedspecialadditional_special_tokens)chat_template)hasattr
ValueErrorrI   r   IMAGE_TOKENadd_special_tokensconvert_tokens_to_idsimage_token_idrB   
add_tokensEXTRA_TOKENSadd_bos_tokenadd_eos_tokensuper__init__)selfimage_processor	tokenizerrM   kwargsrB   tokens_to_add	__class__s          r!   rY   PaliGemmaProcessor.__init__d   s     (:;;Z[[ / @ @y-00$[UDQK8;-HM((7"+"A"A+"ND*D"+":":D(44D\*"'	"'	=Qr    imagestextr]   r-   c                    U R                   " [        4SU R                  R                  0UD6nUS   R	                  SS5      nSnUc  [        S5      eUc  [        R                  S5        Sn[        U5      (       a  U/nO)[        U[        5      (       a  [        US	   5      (       a   UGb[  UGbW  [        S
 U 5       5      (       Gd  [        R                  S5        [        U[        5      (       aQ  [        U[        5      (       a<  [        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      e[        U5      (       a  U//nO[        U[        [        45      (       a&  [        US	   5      (       a  U Vs/ s H  ow/PM     nnOZ[        U[        [        45      (       a4  [        US	   [        [        45      (       a  [        US	   S	   5      (       d  [        S5      e[!        X!5       VV	s/ s HT  u  p[#        UU R                  R$                  U R&                  [(        [        U	[        5      (       a  [        U	5      OSS9PMV     n
nn	O/ nU H  nUR+                  [(        [(        U R&                  -  5      nUR-                  [(        5      nUS:w  a  U[        [(        5      -   OS	nUSU U R                  R$                  -   XS -   nUR/                  U5        M     U Vs/ s H  o S3PM	     n
nUb  [        U5      (       a  U/nUb)  U Vs/ s H  nUU R                  R0                  -   PM     nnU R2                  " U40 US   D6S   nUS   R	                  SS5      nUS   R	                  SS5      nU R                  " W
4UUS.US   D6nU R5                  U
US/S9  0 UESU0EnU(       aK  [6        R8                  " US   5      nSU[6        R8                  " US   5      S	:H  '   UR;                  SU05        U(       a  U R=                  US   5      US'   [?        UUS 9$ s  snf s  sn	nf s  snf s  snf )!a  
Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
      is provided, the `input_ids` will also contain the suffix input ids.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
    - **labels** -- Labels compatible with training if `suffix` is not None
tokenizer_init_kwargsr%   r   NTzF`images` are expected as arguments to a `PaliGemmaProcessor` instance.z]You are using PaliGemma without a text prefix. It will perform as a picture-captioning model. r   c              3   4   #    U  H  n[         U;   v   M     g 7fr6   )rP   ).0samples     r!   	<genexpr>.PaliGemmaProcessor.__call__.<locals>.<genexpr>   s     @4{f,4s   aL  You are passing both `text` and `images` to `PaliGemmaProcessor`. The processor expects special image tokens in the text, as many tokens as there are images per each text. It is recommended to add `<image>` tokens in the very beginning of your text. For this call, we will infer how many images each text has and add special tokens.z	Received z images for zK prompts. Each prompt should be associated with an image or list of images.zAimages must be an image, list of images or list of list of images   r>   r=   r*   pixel_valuesreturn_tensorsr'   )	text_pairreturn_token_type_idsimage)
modalities	input_idsitoken_type_idslabelsmm_token_type_ids)datatensor_type) _merge_kwargsr#   r\   init_kwargspoprO   loggerwarning_oncer;   r0   r   anywarninglenr   tupleziprD   r@   rI   rP   replacerfindappend	eos_tokenr[   _check_special_mm_tokensnparrayupdatecreate_mm_token_type_idsr   )rZ   ra   rb   r]   output_kwargsr   rp   rq   r?   
image_listinput_stringsexpanded_samplesrh   expanded_samplebos_rfind_index	bos_indexsfxrm   rn   r'   inputsreturn_dataru   s                          r!   __call__PaliGemmaProcessor.__call__   sU   ( **$
"&.."<"<
 

 }-11(DA $>eff<o DD!!6Dd##(8a(A(A 2@4@@@< dD))j.F.F6{c$i/('F}LT  LW  X 
 "&))%hZFu66>&QR);T;T39:6%g6F:Fve}55"6!9tUm<<&vay|44$%hii /2$.?	! /@* ,%"&..":":&*&;&;$/6@T6R6R3z?XY /@  	! $& "F&,nn[+PTPePeBe&fO&5&;&;K&HOFUY[F[#k2B BabI'
3dnn6N6NNQ`akQll $ %++O< # >N N=M682=M N"26":":XF@FGcDNN444FG++FUmO6TUVde&}599:JDQ#0#?#C#CD^`d#e 
"7
 M*	
 	%%mV	%R>>> !XXf[12F>BF288F#345:;&12#/3/L/L[YdMe/fK+,.IIq ;	!( !O
 Hs   P;AQ 9Q&#Qc                     0 nUb;  U R                   /[        U5      -  nS/[        U5      -  nUR                  XES.5        [        S0 UD6$ )ax  
Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.

Args:
    image_sizes (list[list[str]], *optional*):
        The input sizes formatted as (height, width) per each image.
Returns:
    `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
    input modalities, along with other useful data.
rk   )num_image_tokensnum_image_patchesr   )rI   r   r   r   )rZ   image_sizesr]   vision_datar   r   s         r!   _get_num_multimodal_tokens-PaliGemmaProcessor._get_num_multimodal_tokens   sZ     " $ 5 56[9II!"c+&6 64Dmn,,,r    c                 ~    U R                   R                  SS/-   nU R                  R                  n[        X-   5      $ )Nrt   ru   )r\   model_input_namesr[   r   )rZ   tokenizer_input_namesimage_processor_input_namess      r!   r   $PaliGemmaProcessor.model_input_names	  s?     $ @ @DTV^C_ _&*&:&:&L&L#)GHHr    )rI   rB   rS   )NNN)NNr6   )r   r   r   r   rY   r   r   r   r   r   r   r#   r   r   r   propertyr   r   __classcell__)r_   s   @r!   rF   rF   b   s     	R8  %)Z^tJT!tJ ++d9o=EV@WWtJ 12	tJ
 
tJ tJl-$ I Ir    rF   )$r   numpyr   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r   r	   r
   r   tokenization_utils_baser   r   r   utilsr   r   
get_loggerr   r|   rP   rangerU   r   r#   boolr4   r9   r;   rD   rF   __all__)is   0r!   <module>r      s     4 5  P O , 
		H	%).t5A$qgQ5RWX[R\8]R\Q4#waR\8]]]* ]
/u 
;4 ;
0BN, jI jI jIZ  
 Y 68]s   B?0C