
    Z j8'                        S SK rS SKrS SKJs  Jr  SSKJr  SSK	J
r
Jr  SSKJrJrJrJr  SSKJrJrJr  SSKJrJr  SSKJr  \R4                  " \5      r " S	 S
\SS9rSr\" S5       V s/ s H
  n SU S S3PM     sn \" S5       V s/ s H
  n SU S S3PM     sn -   r \\" SS9 " S S\5      5       5       r!S/r"gs  sn f s  sn f )    N   )BatchFeature)
ImageInputmake_nested_list_of_images)MultiModalDataProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)auto_docstringlogging)requiresc                   *    \ rS rSrSSSS.SS0S.rS	rg
)PI0ProcessorKwargs$   
max_length0   right)paddingr   padding_sidereturn_tensorspt)text_kwargscommon_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       w/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pi0/processing_pi0.pyr   r   $   s#     $#

 +D1Ir$   r   F)totalz<image>i   z<locz0>4>   z<segz0>3)visiontorch)backendsc                   D  ^  \ rS rSrSU 4S jjr\   SS\\\   -  \\\      -  S-  S\\	-  \\   -  \\	   -  S-  S\\
R                  -  \R                  -  S-  S\\
R                  -  \R                  -  S-  S\\   S	\4S
 jj5       rSS jr\U 4S j5       rSrU =r$ )PI0Processor3   Nc                   > UR                   S   UR                   S   sU l        U l        UR                  S/ SQ5      nUR                  S/ SQ5      nUR                  S/ SQ5      nUR                  S	/ S
Q5      n[        R
                  " U5      U l        [        R
                  " U5      U l        [        R
                  " U5      U l        [        R
                  " U5      U l	        UR                  SS5      U l
        UR                  SS5      U l        [        US5      (       d  [        S5      eUR                  U l        [        US5      (       dK  [        [         SSS9n	SU	/0n
UR#                  U
5        UR%                  [         5      U l        [         U l        O"UR&                  U l        UR(                  U l        UR+                  [,        5        SUl        SUl        [2        TU ]i  XUS9  g )Nheightwidth
state_mean)ggsgr?g	h"l?gW2D@g\ AcgZd;OſgB>٬?gQI	state_std)gt$~?gL
F%u?g.!u?g/n?g6?gx?g]K=?gF%u?actions_mean)g&S?gX ?gW[재gHPsr?gg?g 	gHPsactions_std)gGz?g`"?g9#J{?gvOjM?g>yX5ͫ?g46<R?gj+?max_state_dim    
chunk_size2   image_seq_lengthz;Image processor is missing an `image_seq_length` attribute.image_tokenFT)
normalizedspecialadditional_special_tokens)chat_template)sizer0   r1   getr*   tensorr2   r3   r4   r5   r6   r8   hasattr
ValueErrorr:   r   IMAGE_TOKENadd_special_tokensconvert_tokens_to_idsimage_token_idr;   
add_tokensEXTRA_TOKENSadd_bos_tokenadd_eos_tokensuper__init__)selfimage_processor	tokenizerr?   kwargsr2   r3   r4   r5   r;   tokens_to_add	__class__s              r%   rN   PI0Processor.__init__6   s   "1"6"6x"@/BVBVW^B_TZZZ.rs
JJ{,lm	zz.2mnjj0hi,,z2i0!LL6 <<4#ZZ< **\26(:;;Z[[ / @ @y-00$[UDQK8;-HM((7"+"A"A+"ND*D"+":":D(44D\*"'	"'	=Qr$   imagestextactionsstaterR   returnc                    U R                   " [        4SU R                  R                  0UD6nUc  [        R                  S5        Sn[        U[        5      (       a  U/n[        U5      n[        U5      [        U5      :w  a$  [        S[        U5       S[        U5       S35      eUS   R                  S	S5      nUS
   R                  S	S5        / n	[        X'5       HV  u  pU R                  U R                  -  [        U5      -   U R                  R                   U
 S3n
U	R!                  U
5        MX     U R                  " U	40 US   D6n[#        S U 5       5      n[$        R&                  " [        U5      U4[$        R(                  S9n[$        R&                  " [        U5      USU R*                  U R,                  5      n[/        U5       H>  u  nnU R0                  " U4S	S0US
   D6n[        U5      nSUUSU24'   US   UUSU24'   M@     0 UEUUS.EnUb  [$        R2                  " U5      U R4                  -
  U R6                  S-   -  nUR8                  S   U R:                  :  a3  [<        R>                  " USU R:                  UR8                  S   -
  45      nURA                  SU RB                  U R:                  5      US'   Ub  [$        R2                  " U5      U RD                  -
  U RF                  S-   -  nUR8                  S   U R:                  :  a3  [<        R>                  " USU R:                  UR8                  S   -
  45      nURA                  SU R:                  5      US'   [I        UUS9$ )a  
actions (`list | np.ndarray | torch.Tensor`, *optional*):
    Actions to be predicted by the model. If provided, padding, mean and std normalization will be applied.
state (`list | np.ndarray | torch.Tensor`, *optional*):
    Robotic states to be predicted by the model. If provided, padding, mean and std normalization will be applied.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`. If `suffix`
      is provided, the `input_ids` will also contain the suffix input ids.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
    - **pixel_attention_mask** -- Pixel values padding mask to be fed to a model. Returned when `images` is not `None`.
    - **state** -- Robot state compatible with model if `state` is not None
    - **actions** -- Label-actions compatible with training if `actions` is not None
tokenizer_init_kwargsNzPYou are using PI0 without a text prefix. The processor will use an empty prompt. z	Received z image samples for z\ prompts. Each prompt should be associated with one sample (with one or more camera images).r   r   images_kwargs
c              3   8   #    U  H  n[        U5      v   M     g 7fN)len).0sample_imagess     r%   	<genexpr>(PI0Processor.__call__.<locals>.<genexpr>   s     Un]c-00ns   )dtyper   r   Tpixel_values)rh   pixel_attention_maskg:0yE>r   rX   rY   )datatensor_type)%_merge_kwargsr   rQ   init_kwargsloggerwarning_once
isinstancestrr   rb   rD   popzipr;   r:   	bos_tokenappendmaxr*   zerosboolr0   r1   	enumeraterP   rB   r4   r5   shaper6   Fpadviewr8   r2   r3   r   )rO   rV   rW   rX   rY   rR   output_kwargsbatched_imagesr   prompt_stringssample
image_listtext_inputsmax_num_camerasri   padded_pixel_valuesbatchrd   	processednum_camerasreturn_datas                        r%   __call__PI0Processor.__call__X   si   8 **
6:nn6P6P
TZ
 < rsDdC  6D3F;~#d)+C/00CCI; Oe e 
 '}599:JDQo&**+;TB"%d";F##d&;&;;c*oMNt~~OgOgNhiohpprs  !!&)	 #< nn^T}]7ST UnUU${{C,?+QY^YcYcd#kk#n*=PQSWS^S^`d`j`jk$-n$= E=,,]r4rS`apSqrIm,K8< !457@7P|| 34 %>

/$8
 ||G,t/@/@@TEUEUX]E]^G}}R 4#5#55%%!T-?-?'--PRBS-S)TU%,\\"dootGYGY%ZK	"\\%(4??:t~~PU?UVE{{2!3!33ea););ekk"o)M%NO#(::b$2D2D#EK .IIr$   c                     0 nUb;  U R                   /[        U5      -  nS/[        U5      -  nUR                  XES.5        [        S0 UD6$ )ax  
Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.

Args:
    image_sizes (list[list[str]], *optional*):
        The input sizes formatted as (height, width) per each image.
Returns:
    `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
    input modalities, along with other useful data.
   )num_image_tokensnum_image_patchesr   )r:   rb   updater   )rO   image_sizesrR   vision_datar   r   s         r%   _get_num_multimodal_tokens'PI0Processor._get_num_multimodal_tokens   sZ     " $ 5 56[9II!"c+&6 64Dmn,,,r$   c                     > [         TU ]  S/-   $ )Nri   )rM   model_input_names)rO   rT   s    r%   r   PI0Processor.model_input_names   s    w(,B+CCCr$   )r4   r5   r8   r0   r:   r;   rH   r6   r2   r3   r1   )NNNra   )r   r   r    r!   rN   r   r   listr   r   npndarrayr*   Tensorr
   r   r   r   r   propertyr   r#   __classcell__)rT   s   @r%   r-   r-   3   s    RD  bf;?9=WJT*--T*5E0FFMWJ ++d9o=EV@WWZ^^WJ 

"U\\1D8	WJ
 bjj 5<</$6WJ +,WJ 
WJ WJr-$ D Dr$   r-   )#numpyr   r*   torch.nn.functionalnn
functionalr|   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r   r	   r
   tokenization_utils_baser   r   r   utilsr   r   utils.import_utilsr   
get_loggerr   ro   r   rE   rangerJ   r-   __all__)is   0r%   <module>r      s   *     4 A X X O O , * 
		H	%)  ).t5A$qgQ5RWX[R\8]R\Q4#waR\8]] 	&'QD> QD ( QDh 
s 68]s   ,B<	C