
    Z jR2                         S SK JrJr  S SKJrJrJr  SSKJr  SSK	J
r
Jr  SSKJrJr  SSKJrJr  SSKJrJr  \" 5       (       a  S S	Kr\R,                  " \5      r " S
 S\SS9r " S S\5      rS/rg	)    )OptionalUnion)IMAGE_TOKENPaliGemmaProcessorbuild_string_from_input   )BatchFeature)
ImageInputmake_flat_list_of_images)ProcessingKwargsUnpack)PreTokenizedInput	TextInput)is_torch_availableloggingNc                   .    \ rS rSrSS0SSS.SS0S	.rS
rg)ColPaliProcessorKwargs!   paddinglongestchannels_firstT)data_formatdo_convert_rgbreturn_tensorspt)text_kwargsimages_kwargscommon_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/colpali/modular_colpali.pyr   r   !   s,     y
 ,"
 +D1	Ir&   r   F)totalc                   R  ^  \ rS rSr     SS\S\4U 4S jjjr\S\4S j5       r  SS\S-  S	\	\
-  \\	   -  \\
   -  S
\\   S\4S jjr SS\S-  S
\\   S\4S jjrS	\	\\	   -  S
\\   S\4S jr   SS\S\S   4   S\S\S   4   S\S\S   S\S\4   SS4S jjrSrU =r$ )ColPaliProcessor.   Nvisual_prompt_prefixquery_prefixc                 :   > X@l         XPl        [        TU ]  XUS9  g)z
visual_prompt_prefix (`str`, *optional*, defaults to `"Describe the image."`):
    A string that gets tokenized and prepended to the image tokens.
query_prefix (`str`, *optional*, defaults to `"Question: "`):
    A prefix to be used for the query.
)image_processor	tokenizerchat_templateN)r,   r-   super__init__)selfr/   r0   r1   r,   r-   	__class__s         r'   r3   ColPaliProcessor.__init__/   s$     %9!(]jkr&   returnc                 .    U R                   R                  $ )zr
Return the query augmentation token.

Query augmentation buffers are used as reasoning buffers during inference.
)r0   	pad_token)r4   s    r'   query_augmentation_token)ColPaliProcessor.query_augmentation_tokenA   s     ~~'''r&   imagestextkwargsc                    U R                   " [        4SU R                  R                  0UD6nUS   R	                  SS5      nSnUc  Uc  [        S5      eUb  Ub  [        S5      eUGb|  U R                  R                  U5      n[        U5      nU R                  /[        U5      -  nU Vs/ s H  oR                  R                  U5      PM     nn[        Xq5       V	V
s/ s HT  u  p[        U	U R                  R                  U R                  [         [#        U
[$        5      (       a  [        U
5      OSS	9PMV     nn	n
U R                  " U40 US
   D6S   nUS   R'                  SS5      b  US   S==   U R                  -  ss'   U R                  " U4SU0US   D6n0 UESU0EnU(       a.  US   R)                  US   S:H  S5      nUR+                  SU05        [-        US9$ Ub  [#        U[.        5      (       a  U/nO8[#        U[$        5      (       a  [#        US   [.        5      (       d  [        S5      eUc  U R0                  S-  n/ nU H@  nU R                  R                  U R2                  -   U-   U-   S-   nUR5                  U5        MB     US   R'                  SS5      US   S'   U R                  " U4SU0US   D6nU$ gs  snf s  sn
n	f )a  
Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
tokenizer_init_kwargsr   suffixNTz&Either text or images must be providedz5Only one of text or images can be processed at a time   )prompt	bos_tokenimage_seq_lenimage_token
num_imagesr   pixel_values
max_lengthreturn_token_type_ids	input_idstoken_type_idsr   ilabels)dataz*Text must be a string or a list of strings
   
2   )_merge_kwargsr   r0   init_kwargspop
ValueErrorr/   fetch_imagesr   r,   lenprocess_imagezipr   rD   image_seq_lengthr   
isinstancelistgetmasked_fillupdater	   strr:   r-   append)r4   r<   r=   r>   output_kwargsrA   rJ   	texts_docimagerC   
image_listinput_stringsrH   inputsreturn_datarM   texts_queryquerybatch_querys                      r'   __call__ColPaliProcessor.__call__J   s#     **"
"&.."<"<
 

 }-11(DA $<FNEFF 2TUU))66v>F-f5F223c&kAIMSTVE**88?VFT +.i*@	 +A&F (!"nn66"&"7"7 +2<Z2N2Ns:TU +A  	  //Y-:XYZhiL ]+//dCOm,\:d>S>SS:^^&;  .F CVB^\BK$,88@P9QUV9VX\]""Hf#56[11$$$v t,,DGS1I1I !MNN~66;%'K0043D3DDuLvUX\\""5)  :G}9U9Y9YZfhj9kM-(6..&;  .K - A U	s   .$K"AKc                 *    U R                   " SSU0UD6$ )av  
Prepare for the model one or several image(s). This method is a wrapper around the `__call__` method of the ColPaliProcessor's
[`ColPaliProcessor.__call__`].

This method forwards the `images` and `kwargs` arguments to the image processor.

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `list[PIL.Image.Image]`, `list[np.ndarray]`, `list[torch.Tensor]`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. In case of a NumPy array/PyTorch tensor, each image should be of shape (C, H, W), where C is a
        number of channels, H and W are image height and width.
    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors of a particular framework. Acceptable values are:

        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return NumPy `np.ndarray` objects.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
r<   r   rl   )r4   r<   r>   s      r'   process_imagesColPaliProcessor.process_images   s    > }}5F5f55r&   c                 *    U R                   " SSU0UD6$ )a  
Prepare for the model one or several texts. This method is a wrapper around the `__call__` method of the ColPaliProcessor's
[`ColPaliProcessor.__call__`].

This method forwards the `text` and `kwargs` arguments to the tokenizer.

Args:
    text (`str`, `list[str]`, `list[list[str]]`):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors of a particular framework. Acceptable values are:

        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return NumPy `np.ndarray` objects.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
r=   r   ro   )r4   r=   r>   s      r'   process_queries ColPaliProcessor.process_queries   s    < }}1$1&11r&   query_embeddingsztorch.Tensorpassage_embeddings
batch_sizeoutput_dtypeztorch.dtypeoutput_deviceztorch.devicec           	         [        U5      S:X  a  [        S5      e[        U5      S:X  a  [        S5      eUS   R                  US   R                  :w  a  [        S5      eUS   R                  US   R                  :w  a  [        S5      eUc  US   R                  n/ n[	        S[        U5      U5       GH  n/ n[
        R                  R                  R                  R                  XXs-    SSS9n	[	        S[        U5      U5       H}  n
[
        R                  R                  R                  R                  X*X-    SSS9nUR                  [
        R                  " SX5      R                  S	S
9S   R                  SS
95        M     UR                  [
        R                  " USS
9R                  U5      R                  U5      5        GM     [
        R                  " USS
9$ )a  
Compute the late-interaction/MaxSim score (ColBERT-like) for the given multi-vector
query embeddings (`qs`) and passage embeddings (`ps`). For ColPali, a passage is the
image of a document page.

Because the embedding tensors are multi-vector and can thus have different shapes, they
should be fed as:
(1) a list of tensors, where the i-th tensor is of shape (sequence_length_i, embedding_dim)
(2) a single tensor of shape (n_passages, max_sequence_length, embedding_dim) -> usually
    obtained by padding the list of tensors.

Args:
    query_embeddings (`Union[torch.Tensor, list[torch.Tensor]`): Query embeddings.
    passage_embeddings (`Union[torch.Tensor, list[torch.Tensor]`): Passage embeddings.
    batch_size (`int`, *optional*, defaults to 128): Batch size for computing scores.
    output_dtype (`torch.dtype`, *optional*, defaults to `torch.float32`): The dtype of the output tensor.
        If `None`, the dtype of the input embeddings is used.
    output_device (`torch.device` or `str`, *optional*, defaults to "cpu"): The device of the output tensor.

Returns:
    `torch.Tensor`: A tensor of shape `(n_queries, n_passages)` containing the scores. The score
    tensor is saved on the "cpu" device.
r   zNo queries providedzNo passages providedz/Queries and passages must be on the same devicez-Queries and passages must have the same dtypeT)batch_firstpadding_valuezbnd,csd->bcnsr   )dim   rB   )rW   rU   devicedtyperangetorchnnutilsrnnpad_sequencera   einsummaxsumcatto)r4   ru   rv   rw   rx   ry   scoresibatch_scoresbatch_queriesjbatch_passagess               r'   score_retrieval ColPaliProcessor.score_retrieval   s   @  A%233!"a'344A%%);A)>)E)EENOOA$$(:1(=(C(CCLMM+A.44L%'q#./<A/1L!HHNN..;; Q^4$VW < M 1c"45zB!&!3!3!@!@&1>:\] "A " ##LL-PTTYZT[\]^bbghbi	 C MM%))La8;;LILL][\ = yyQ''r&   )r-   r,   )NNNzDescribe the image.z
Question: )NN)N)   Ncpu)r    r!   r"   r#   r`   r3   propertyr:   r
   r   r   r\   r   r   r	   rl   rp   rs   r   intr   r   r%   __classcell__)r5   s   @r'   r*   r*   .   s    $9(l
 "l l l$ (# ( ( %)Z^XT!X ++d9o=EV@WWX /0	X
 
Xx %)6T!6 /06 
	6B2$y/)2 /02 
	2H 0449>(^0D DE>( ".$~2F"FG>( 	>(
 }->( ^S01>( 
>( >(r&   r*   )typingr   r   2transformers.models.paligemma.processing_paligemmar   r   r   feature_extraction_utilsr	   image_utilsr
   r   processing_utilsr   r   tokenization_utils_baser   r   r   r   r   r   
get_loggerr    loggerr   r*   __all__r   r&   r'   <module>r      sh     # w w 4 ? 8 C 0 			H	%
-U 
u() u(r r&   