
    Z jA                        S SK r S SKJr  S SKrS SKJs  Jr  S SKJr  SSKJ	r
  SSKJr  SSKJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJr  SSKJr  SSKJr   " S S\R:                  5      r " S S\R:                  5      r\ " S S\5      5       r S\RB                  S\4S jr"\ " S S\ 5      5       r# " S S\ 5      r$/ SQr%g)    N)Callable)nn   )initialization)Cache)create_bidirectional_mask)BaseModelOutputWithPastCausalLMOutputWithPast)PreTrainedModel)auto_docstringcan_return_tuple)maybe_autocast   )	AutoModel   )	PI0Configc                   >   ^  \ rS rSrU 4S jr\S 5       rS rSrU =r	$ )PI0TimestepEmbeddings'   c                 r   > [         TU ]  5         Xl        U R                  U5      nU R	                  SUSS9  g )Nsinusoid_freqF)
persistent)super__init__configcompute_freqsregister_buffer)selfr   r   	__class__s      u/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/pi0/modeling_pi0.pyr   PI0TimestepEmbeddings.__init__(   s8    **62_mN    c                    [         R                  " SSU R                  R                  S-  [         R                  S9nU R
                  U R                  U R
                  -  U-  -  nSU-  S-  [        R                  -  nU$ )N              ?r   dtype)	torchlinspace
dit_confighidden_sizefloat32
min_period
max_periodmathpi)r   fractionperiodr   s       r    r   #PI0TimestepEmbeddings.compute_freqs.   sr    >>#sF,=,=,I,IQ,NV[VcVcd""f&7&7&:K:K&KPX%XXfq(4772r"   c                    [        UR                  R                  [        5      (       a0  UR                  R                  S:w  a  UR                  R                  OSn[	        USS9   U R
                  S S S 24   nX1S S 2S 4   -  n[        R                  " UR                  5       UR                  5       /SS9nS S S 5        U$ ! , (       d  f       W$ = f)NmpscpuF)device_typeenabledr   dim)

isinstancedevicetypestrr   r   r(   catsincos)r   timer7   r   embtime_embedss         r    forwardPI0TimestepEmbeddings.forward5   s    *4T[[5E5Es*K*KPTP[P[P`P`diPidkk&&otUC ..tQw7Mq$w-/C))SWWY	$:BK D 	 DC s   &AC
C)r   )
__name__
__module____qualname____firstlineno__r   staticmethodr   rE   __static_attributes____classcell__r   s   @r    r   r   '   s'    O   r"   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )PI0ActionTimeEmbedding>   c                 @  > [         TU ]  5         [        U5      U l        [        R
                  " UR                  UR                  R                  5      U l	        [        R
                  " UR                  UR                  R                  5      U l        [        R
                  " SUR                  R                  -  UR                  R                  5      U l        [        R
                  " UR                  R                  UR                  R                  5      U l        g )Nr   )r   r   r   sinusoid_embedsr   Linearmax_action_dimr*   r+   action_in_projmax_state_dim
state_projaction_time_mlp_inaction_time_mlp_outr   r   r   s     r    r   PI0ActionTimeEmbedding.__init__?   s    4V< ii(=(=v?P?P?\?\]))F$8$8&:K:K:W:WX"$))A0A0A0M0M,MvO`O`OlOl"m#%99V->->-J-JFL]L]LiLi#j r"   c                    U R                  U5      nU R                  U5      nU R                  U5      nUS S 2S S S 24   R                  U5      R	                  UR
                  S9n[        R                  " XV/SS9nU R                  [        R                  " U R                  U5      5      5      n[        R                  " US S 2S S S 24   U/SS9nU$ )Nr&   r   r9   r   )rX   rV   rS   	expand_astor'   r(   r?   rZ   FsilurY   )	r   statenoisetimestepstate_embedsaction_embedsrD   action_time_embedsaction_embeds_mergeds	            r    rE   PI0ActionTimeEmbedding.forwardG   s    u-++E2**84!!T1*-77FIIP]PcPcId"YY'CK!55affT=T=TUg=h6ij$yy,q$z*BDV)W]^_##r"   )rV   rY   rZ   rS   rX   )rG   rH   rI   rJ   r   rE   rL   rM   rN   s   @r    rP   rP   >   s    k
$ 
$r"   rP   c                   ^   ^  \ rS rSr% \\S'   SrSrSrS/r	Sr
SrSrSrSrSrU 4S jrS	rU =r$ )
PI0PreTrainedModelT   r   modelrb   Tpast_key_values)imagetextc                    > [         TU ]  U5        [        U[        5      (       a;  [        R
                  " UR                  UR                  UR                  5      5        g g N)	r   _init_weightsr;   r   initcopy_r   r   r   )r   moduler   s     r    rs    PI0PreTrainedModel._init_weightsb   sF    f%f344JJv++V-A-A&---PQ 5r"    )rG   rH   rI   rJ   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_skip_keys_device_placement_supports_flash_attn_supports_sdpa_supports_flex_attn_can_compile_fullgraph_supports_attention_backendinput_modalitiesrs   rL   rM   rN   s   @r    rk   rk   T   sR    O&*##4"5N!"&(R Rr"   rk   block_boundariesreturnc           
      T   ^  S[         S[         S[         S[         S[        4
U 4S jjnU$ )N	batch_idxhead_idxq_idxkv_idxr   c                 h   > [         R                  " UT5      n[         R                  " UT5      nXT:*  $ rr   )r(   	bucketize)r   r   r   r   q_blockkv_blockr   s         r    
inner_mask0blockwise_bidirectional_mask.<locals>.inner_maski   s.    //%)9:??6+;<""r"   )intbool)r   r   s   ` r    blockwise_bidirectional_maskr   h   s3    #c #S # #c #d #
 r"   c                   P  ^  \ rS rSrS\4U 4S jjrS rS rSS jr\	\
       SS\R                  S	\R                  S-  S
\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\R                  S-  S\S-  S\4S jj5       5       rSrU =r$ )PI0Modelq   r   c                    > [         TU ]  U5        [        R                  " UR                  5      U l        [        R                  " UR                  5      U l        U R                  5         g rr   )	r   r   r   from_configr*   dit
vlm_configvlm	post_initr[   s     r    r   PI0Model.__init__s   sJ     (():):;(():):;r"   c                 6    U R                   R                  5       $ rr   )r   get_input_embeddings)r   s    r    r   PI0Model.get_input_embeddingsy   s    xx,,..r"   c                 :    U R                   R                  U5        g rr   )r   set_input_embeddings)r   values     r    r   PI0Model.set_input_embeddings|   s    %%e,r"   Nc                    UR                   S   nUR                  SS5      nU R                  R                  U5      R                  nUR                  SXVR                   S   UR                   S   5      n/ n[        U5       H  u  pXh   U	   n
UR                  U
5        M     [        R                  " USS9nUR                  5       nSXU R                  R                  R                  :H  '   U R                  R                  5       " U5      nXR                  R                  R                  :H  R                  S5      R!                  U5      R#                  UR$                  5      nUR'                  X5      nU$ )Nr   r   r   r9   )shapeflattenr   get_image_featurespooler_outputreshape	enumerateappendr(   r?   cloner   r   image_token_idr   	unsqueezer^   r_   r<   masked_scatter)r   	input_idspixel_valuespixel_attention_maskattention_maskmax_num_camerasimage_featurestotal_image_featuresr   maskunpadded_image_featuresllm_input_idsinputs_embedsspecial_image_masks                 r    embed_prefixPI0Model.embed_prefix   sK   .44Q7#++Aq144\BPP'//OEYEYZ[E\^l^r^rst^uv!()=>OI&4&?&E# ''(?@  ?  %yy)=1E!)LM4;;#9#9#H#HHI557F++00???Yr]Y}%R$$%	 	 &445G^r"   rf   r   r   r   r   position_idsr   rn   r   c	           	         Ubo  Ucl  Ub  Uc  UR                  S5      S-
  nUc  U R                  X#U5      n[        R                  " U5      SS2SS2S4   n
U R	                  UUUU
SS9R
                  nUb  UR                  S:w  a  [        S5      eS=pUb  [        R                  " UR                  S   UR                  S   UR                  UR                  S	9n[        R                  " XM/SS
9n[        R                   " USS
9S-
  SS2UR                  S   * S24   nUR                  5       n[        R                  " US-   UR                  S   S-
  /UR                  S9n[        R                   " USS
9S-
  n[        U R                   R"                  UUU[%        U5      S9nU R&                  " SUUUUS.U	D6nU$ )z
action_embeds (`torch.Tensor`, *optional*):
    The embeddings of input actions and robot states.
pixel_attention_mask (`torch.Tensor`, *optional*):
    The mask indicating padded positions in the input image.
Nr   r   r   T)r   r   r   token_type_ids	use_cacher   z:Only two-dimensional attention masks are accepted for now!r'   r<   r9   )r<   )r   r   r   rn   and_mask_function)r   r   r   rn   rx   )cumsumr   r(   
zeros_liker   rn   ndim
ValueErroronesr   r'   r<   r?   get_seq_lengthtensorr   r   r*   r   r   )r   rf   r   r   r   r   r   r   rn   kwargsr   dit_position_idsdit_attention_mask
noise_maskvlm_input_lengthblock_sizesr   bidirectional_mask
dit_outputs                      r    rE   PI0Model.forward   s   ( #(?)l.B-44R81<$ $ 1 1)K_ `"--m<Q1WEN"hh+-)- '  o  %.*=*=*BYZZ 154%##A&##A&$**%,,	J "'N+GQ!O %-?Q G! KQQ^QdQdefQgPgPiMij +99;ll$4q$8-:M:Ma:PST:T#U^k^r^rs <<;a?6;;))'-+:;KL
 XX 
'-)+	

 

 r"   )r   r   rr   )NNNNNNN)rG   rH   rI   rJ   r   r   r   r   r   r   r   r(   Tensor
LongTensorr   r	   rE   rL   rM   rN   s   @r    r   r   q   s    y /-2  *.,0.24804-1(,E||E <<$&E llT)	E
 t+E $llT1E &&-E ||d*E E 
!E  Er"   r   c                     ^  \ rS rSrSrSS0rS\4U 4S jjr\\	          SS\
R                  S	\
R                  S-  S
\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  S\4S jj5       5       r\
R$                  " 5           SS\
R                  S\
R                  S\
R                  S	\
R                  S-  S\
R                  S-  S\
R                  S-  S\S-  S\
R                  4S jj5       rSrU =r$ )PI0ForConditionalGeneration   z9PI0 model with action projection heads and flow matching.action_out_projcolwise_gather_outputr   c                   > [         TU ]  U5        [        U5      U l        UR                  R
                  U l        [        U5      U l        [        R                  " U R                  UR                  5      U l        U R                  5         g rr   )r   r   r   rm   r*   r+   expert_hidden_sizerP   embed_action_timer   rT   rU   r   r   r[   s     r    r   $PI0ForConditionalGeneration.__init__   sf     f%
"("3"3"?"?!7!?!yy)@)@&BWBWXr"   Nrb   rc   rd   r   r   r   r   r   r   rn   actionsr   c                    UR                   S   nUc  [        R                  " U R                  R                  [        R
                  S9n[        R                  " U R                  R                  [        R
                  S9n[        R                  R                  X5      nUR                  U45      R                  UR                  5      nUU R                  R                  -  U R                  R                  -   R                  5       nUcT  [        R                  " UU R                  R                   U R                  R"                  UR                  UR$                  S9nUb6  USS2SS4   nUU-  SU-
  U-  -   R                  UR$                  5      nX+-
  nOUnU R'                  UUU5      nU R(                  " S	UUUUUU	UU
S.UD6nUR*                  SS2U R                  R                   * S24   nU R-                  U5      nSnUb*  [.        R0                  " WUU R                  R2                  S9n[5        UUUR6                  UR8                  UR:                  S9$ )
a  
state (`torch.Tensor`, *optional*):
    Current robot state.
noise (`torch.Tensor`, *optional*):
    Random noise at current timestep that needs to be denoised
timestep (`torch.Tensor`, *optional*):
    Current denoising timestep.
pixel_attention_mask (`torch.Tensor`, *optional*):
    The mask indicating padded positions in the input image.
actions (`torch.Tensor`, *optional*):
    Input actions that need to be predicted. Used only when training to compiute loss.
r   Nr&   )r<   r'   r   )r   r   r   r   r   r   rf   rn   )	reduction)losslogitsrn   hidden_states
attentionsrx   )r   r(   r   r   time_sampling_beta_alphar,   time_sampling_beta_betadistributionsBetasampler_   r<   time_sampling_scaletime_sampling_offsetfloatrandn
chunk_sizerU   r'   r   rm   last_hidden_stater   r`   mse_lossloss_reductionr
   rn   r   r   )r   rb   rc   rd   r   r   r   r   r   r   rn   r   r   
batch_sizealpha_tbeta_tdist	time_betatime_expandednoisy_actionstarget_velocityrg   outputslast_hidden_statespredicted_velocityr   s                             r    rE   #PI0ForConditionalGeneration.forward   s!   : [[^
 ll4;;#G#Gu}}]G\\$++"E"EU]][F&&++G<DZM255ellCI!DKK$C$CCdkkFfFffmmoH =KK&&**||kkE $Qd]3M*U2a-6G75RRVVW^WdWdeM#oO!M "33E=(S** 

%)!5%',+

 

 %66q4;;;Q;Q:Q:S7ST!112DE::o/AT[[MgMghD%%#33!//))
 	
r"   	num_stepsc           
         U=(       d    U R                   R                  nUR                  S   n	UR                  n
UcM  [        R
                  " SSU	U R                   R                  U R                   R                  4UR                  U
S9nUb  UR                  S5      S-
  nU R                  R                  X#U5      nU R                  R                  UUWSSS9R                  nUR                  5       nS	U-  n[        U5       Hk  nSUU-  -   n[        R                   " U[        R"                  U
S
9R%                  U	5      nU " UUUUUUS9nUR'                  U5        XOUR(                  -  -   nMm     U$ )z0Run flow matching inference to generate actions.r   r$   r%   )meanstdsizer'   r<   r   r   T)r   r   r   r   return_dictg      r   )rb   rc   rd   r   r   rn   )r   num_inference_stepsr   r<   r(   normalr   rU   r'   r   rm   r   r   rn   r   ranger   r,   expandcropr   )r   rb   r   r   rc   r   r   r  r   r   r<   r   r   rn   prefix_lengthdtsteprB   time_tensoroutputs                       r    sample_actions*PI0ForConditionalGeneration.sample_actionsG  s~    @!@!@	__Q'
!! =LLKK**KK..
 #((
E %)004q8L

//	I]^**..')% ) 
 / 	 (668 I)$D?D,,t5==PWWXbcK$%9- /F   /..E % r"   )r   r   r   rm   )
NNNNNNNNNN)NNNN)rG   rH   rI   rJ   __doc___tp_planr   r   r   r   r(   FloatTensorr   
BoolTensorr   r   r
   rE   no_gradr   r  rL   rM   rN   s   @r    r   r      s   C!#:;Hy   +/-1)-,08<.204-1(,%)T
  T
   4'T
 ##d*	T

 <<$&T
 llT)T
 $..5T
 t+T
 &&-T
 ||d*T
 T
 ""T
 
 T
  T
l ]]_ +/.28< $=  = ##= ''	=
   4'= t+= $..5= := 
		= =r"   r   )rk   r   r   )&r/   collections.abcr   r(   torch.nn.functionalr   
functionalr`    r   rt   cache_utilsr   masking_utilsr   modeling_outputsr	   r
   modeling_utilsr   utilsr   r   utils.genericr   autor   configuration_pi0r   Moduler   rP   rk   r   r   r   r   __all__rx   r"   r    <module>r&     s   *  $     &   6 O - 5 +  (BII .$RYY $, R R R&5<< H  m! m m`c"4 cL Lr"   