
    Z jM                     d   S SK r S SKJrJr  S SKJr  S SKrS SKJr  S SK	Js  J
r  SSKJr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJrJrJr  SSKJ r   SSK!J"r"  SSK#J$r$  \RJ                  " \&5      r'\\ " S S\5      5       5       r(\\ " S S\5      5       5       r) " S S\RT                  5      r+ " S S\RT                  5      r,\" S5       " S S\RT                  5      5       r- " S S\RT                  5      r. S1S\RT                  S\R^                  S\R^                  S \R^                  S!\R^                  S-  S"\0S#\0\1-  S$\\   4S% jjr2 " S& S'\RT                  5      r3 " S( S)\RT                  5      r4\ " S* S+\5      5       r5\ " S, S-\55      5       r6 " S. S/\55      r7/ S0Qr8g)2    N)CallableSequence)	dataclass   )initialization)use_kernel_forward_from_hub)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )TimesFmConfigc                   j    \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
g)TimesFmOutput,   z
loc (`torch.Tensor` of shape `(batch_size, )`):
    The mean of the time series inputs.
scale (`torch.Tensor` of shape `(batch_size,)`):
    The scale of the time series inputs.
Nlocscale )__name__
__module____qualname____firstlineno____doc__r   torchTensor__annotations__r   __static_attributes__r       }/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/timesfm/modeling_timesfm.pyr   r   ,   s/      $C	#!%E5<<$%r%   r   c                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  \-  S-  \S'   Srg)TimesFmOutputForPrediction:   a  
mean_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
    The mean predictions of the time series.
full_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
    The full predictions of the time series including the mean and the quantiles.
loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `future_values` is provided):
    The loss of the TimesFM model.
Nmean_predictionsfull_predictionslossr   )r   r   r   r   r    r*   r!   r"   r#   r+   r,   floatr$   r   r%   r&   r(   r(   :   sI     -1ellT)0,0ellT)0(,D%,,

%,r%   r(   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )
TimesFmMLPK   zPax MLP in pytorch.configc                    > [         TU ]  5         UR                  nUR                  n[        R
                  " X#5      U l        [        R
                  " X25      U l        [        R                  " USS9U l	        g )Nư>)normalized_shapeeps)
super__init__hidden_sizeintermediate_sizennLinear	gate_proj	down_proj	LayerNorm
layer_norm)selfr1   r8   r9   	__class__s       r&   r7   TimesFmMLP.__init__N   sX    (("44;B#4B,,Nr%   c                     U R                  U5      nU R                  U5      n[        R                  " U5      nU R	                  U5      nUb  USUS S 2S S 2S 4   -
  -  nXQ-   $ )N      ?)r?   r<   Frelur=   )r@   xpaddingsgate_inpgateoutputss         r&   forwardTimesFmMLP.forwardW   sa    ??1%~~h'vvd|..&x1d
';!;<G{r%   )r=   r<   r?   N
r   r   r   r   r    r   r7   rL   r$   __classcell__rA   s   @r&   r/   r/   K   s    O} O r%   r/   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TimesFmResidualBlocka   zTimesFM residual block.c                   > [         TU ]  5         Xl        X l        X0l        [
        R                  " X5      U l        [
        R                  " 5       U l	        [
        R                  " X#5      U l
        [
        R                  " X5      U l        g rN   )r6   r7   
input_dimshidden_dimsoutput_dimsr:   r;   input_layerSiLU
activationoutput_layerresidual_layer)r@   rV   rW   rX   rA   s       r&   r7   TimesFmResidualBlock.__init__d   s_    $&&99Z='')IIk? ii
@r%   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX4-   $ rN   )rY   r[   r\   r]   )r@   rG   hiddenoutputresiduals        r&   rL   TimesFmResidualBlock.forwardo   sI    !!!$(""6*&&q)  r%   )r[   rW   rV   rY   rX   r\   r]   )	r   r   r   r   r    r7   rL   r$   rP   rQ   s   @r&   rS   rS   a   s    !	A! !r%   rS   RMSNormc                   x   ^  \ rS rSrS
S\SS4U 4S jjjrS\R                  S\R                  4S jrS r	S	r
U =r$ )TimesFmRMSNormw   r5   returnNc                    > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        X l        g)z-
TimesFmRMSNorm is equivalent to T5LayerNorm
N)r6   r7   r:   	Parameterr!   onesweightvariance_epsilon)r@   r8   r5   rA   s      r&   r7   TimesFmRMSNorm.__init__y   s/     	ll5::k#:; #r%   hidden_statesc                    UR                   nUR                  [        R                  5      nUR	                  S5      R                  SSS9nU[        R                  " X0R                  -   5      -  nU R                  UR                  U5      -  $ )N   T)keepdim)	dtypetor!   float32powmeanrsqrtrm   rl   )r@   ro   input_dtypevariances       r&   rL   TimesFmRMSNorm.forward   sw    #))%((7 $$Q',,R,>%H?T?T4T(UU{{]--k:::r%   c                 ^    [        U R                  R                  5       SU R                   3$ )Nz, eps=)tuplerl   shaperm   )r@   s    r&   
extra_reprTimesFmRMSNorm.extra_repr   s*    ))*+6$2G2G1HIIr%   )rm   rl   )r3   )r   r   r   r   r-   r7   r!   r"   rL   r   r$   rP   rQ   s   @r&   rf   rf   w   sB    $ $$ $ $;U\\ ;ell ;J Jr%   rf   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )TimesFmPositionalEmbedding   z6Generates position embedding for a given 1-d sequence.r1   c                   > [         TU ]  5         UR                  nUR                  nX#sU l        U l        UR                  U l        U R
                  S-  n[        R                  " [        U5      [        U5      -  5      [        US-
  S5      -  nU R                  SU[        R                  " [        R                  " U[        R                  S9U* -  5      -  5        g )Nrq   r   inv_timescalesrt   )r6   r7   min_timescalemax_timescaler8   embedding_dimsmathlogr-   maxregister_bufferr!   exparangerv   )r@   r1   r   r   num_timescaleslog_timescale_incrementrA   s         r&   r7   #TimesFmPositionalEmbedding.__init__   s    ,,,,1>.D.$00,,1"&((5+?%BV+V"WZ]^lop^prsZt"tEIIell>&W[rZr&rss	
r%   c                 T   Uc  Uc  [        S5      eUcH  [        R                  " U[        R                  U R                  R
                  S9R                  S5      nO(UR                  S:w  a  [        SUR                   35      eUR                  " / UR                  QSP76 U R                  R                  SSS5      -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9n[        R                  " USSSU R                  S-  45      nU$ )	a  Generates a Tensor of sinusoids with different frequencies.

Args:
    seq_length: an optional Python int defining the output sequence length.
      if the `position` argument is specified.
    position: [B, seq_length], optional position for each token in the
      sequence, only required when the sequence is packed.

Returns:
    [B, seqlen, D] if `position` is specified, else [1, seqlen, D]
z.Either position or seq_length must be providedrt   devicer   rq   z*position must be 2-dimensional, got shape r   rr   dim)
ValueErrorr!   r   rv   r   r   	unsqueezendimr   viewcatsincosrE   padr   )r@   
seq_lengthpositionscaled_timesignals        r&   rL   "TimesFmPositionalEmbedding.forward   s     
 2MNN||JemmDL_L_LfLfgqqrstH]]aI(..IYZ[[mm7X^^7Q7$:M:M:R:RSTVWY[:\\EIIk2EIIk4JKQRS v1a)<)<q)@ABr%   )r   r   r   NNrO   rQ   s   @r&   r   r      s    @
} 
 r%   r   modulequery_states
key_statesvalue_statesattention_maskscalingdropoutkwargsc                    [         R                  " XR                  SS5      5      U-  nUb  X-   n[        R                  R                  US[         R                  S9R                  UR                  5      n[        R                  R                  XU R                  S9n[         R                  " X5      n	U	R                  SS5      R                  5       n	X4$ )Nrq   r   rr   )r   rt   )ptrainingr   )r!   matmul	transposer:   
functionalsoftmaxrv   ru   rt   r   r   
contiguous)
r   r   r   r   r   r   r   r   attn_weightsattn_outputs
             r&   simple_eager_attention_forwardr      s     <<.B.B1a.HIGSL!#4==((2U]](SVVWcWiWijL==((6??([L,,|:K''1-88:K$$r%   c                      ^  \ rS rSrSrS\S\4U 4S jjrS\R                  S\R                  4S jr
 SS
\R                  S\R                  S	-  S\\   S\\R                  \R                  S	-  4   4S jjrSrU =r$ )TimesFmAttention   zlImplements the attention used in TimesFM. One key difference is that there is _per_dim_scaling of the query.r1   	layer_idxc                   > [         TU ]  5         Xl        SU l        UR                  U l        X l        UR                  U l        UR                  U l        UR                  U l	        U R                  U R                  -  U l
        U R                  U R                  -  U l        [        R                  " [        R                  " U R                  45      5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  -  U R                  5      U l        g )NT)r6   r7   r1   	is_causalattention_dropoutr   num_attention_heads	num_headsr8   head_dimq_sizekv_sizer:   rj   r!   emptyr   r;   q_projk_projv_projo_projr@   r1   r   rA   s      r&   r7   TimesFmAttention.__init__   s/   !'!9!9"33!--nnt}}4~~5||EKK0@$ABii 0 0$..4==2PQii 0 0$..4==2PQii 0 0$..4==2PQii >@P@PQr%   queryrh   c                     [         R                  " U R                  5      R                  S[        R
                  " U R                  5      -  5      nXS S S S S 24   -  $ )Ng^$3eG?)rE   softplusr   mulr   sqrtr   )r@   r   r   s      r&   _scale_queryTimesFmAttention._scale_query   sH    

4<<(,,[499T]];S-STT4q0111r%   Nro   r   r   c                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  SS.UD6u  pU
R                  " / UQSP76 R!                  5       n
U R#                  U
5      n
X4$ )Nrr   r   rq           rD   )r   r   )r   r   r   r   r   r   r   r   r   get_interfacer1   _attn_implementationr   r   r   reshaper   r   )r@   ro   r   r   input_shapehidden_shaper   r   r   attention_interfacer   r   s               r&   rL   TimesFmAttention.forward   sQ    $))#2.88b8$--8{{=166|DNNqRST((6[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.L)
 %8	%
  $}}C$2H2H	%
 	%
! "));;;;FFHkk+.((r%   )r   r1   r   r8   r   r   r   r   r   r   r   r   r   r   rN   )r   r   r   r   r    r   intr7   r!   r"   r   r   r	   r~   rL   r$   rP   rQ   s   @r&   r   r      s    vR} R R(2%,, 25<< 2 /3)||) t+) -.	)
 
u||U\\D00	1) )r%   r   c                      ^  \ rS rSrSrS\S\4U 4S jjrS\R                  S\R                  S\R                  S	\R                  4S
 jr
SrU =r$ )TimesFmDecoderLayeri  zTransformer layer.r1   r   c                    > [         TU ]  5         [        XS9U l        [	        U5      U l        [        UR                  UR                  S9U l	        g )N)r   )r5   )
r6   r7   r   	self_attnr/   mlprf   r8   rms_norm_epsinput_layernormr   s      r&   r7   TimesFmDecoderLayer.__init__  sA    )&Ff%-f.@.@fFYFYZr%   ro   r   rH   rh   c                 v    UnU R                  U5      nU R                  UUS9u  pXQ-   nU R                  XS9nU$ )N)ro   r   )rH   )r   r   r   )r@   ro   r   rH   r   rb   _s          r&   rL   TimesFmDecoderLayer.forward  sW     !,,];>>') * 
 !0 Br%   )r   r   r   )r   r   r   r   r    r   r   r7   r!   r"   rL   r$   rP   rQ   s   @r&   r   r     s\    [} [ [||  ,,	 
 r%   r   c                   |   ^  \ rS rSr% \\S'   SrS/rSrSr	Sr
\\S.r\R                  " 5       U 4S	 j5       rS
rU =r$ )TimesFmPreTrainedModeli,  r1   timesfmr   past_values)timeT)ro   
attentionsc                 8  > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g [        U[        5      (       a  UR                  S-  nUR                  UR                  pC[        R                  " [        U5      [        U5      -  5      [        US-
  S5      -  n[        R                  " UR                   U["        R$                  " ["        R&                  " U["        R(                  S9U* -  5      -  5        g g )Nrq   r   r   )r6   _init_weights
isinstancer   initones_r   r   r   r   r   r   r   r-   r   copy_r   r!   r   r   rv   )r@   r   r   r   r   r   rA   s         r&   r   $TimesFmPreTrainedModel._init_weights9  s    f%f.//JJv~~& :;;#22a7N+1+?+?AUAU=&*hhu]/CeMFZ/Z&[^a"A_ '# JJ%%))ELLu}}MQhPhhij <r%   r   )r   r   r   r   r   r#   base_model_prefix_no_split_modulesmain_input_nameinput_modalities_supports_sdpar   r   _can_record_outputsr!   no_gradr   r$   rP   rQ   s   @r&   r   r   ,  sP    !./#O N,&
 ]]_ r%   r   c                     ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\\R                  \\R                  \R                  4   4   4S jr	\
\\S\R                  S	\R                  S
\R                  S\\   S\4
S j5       5       5       r\ SS\R                  S-  S\S\R(                  S\R*                  S\S\R                  S-  4S jj5       r\S\R                  S\R                  S\\R                  \R                  4   4S j5       r\S\R                  S\R                  S\R                  4S j5       rSrU =r$ )TimesFmModeliL  r1   c           	        > [         TU ]  U5        Xl        [        SUR                  -  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  S9U l        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        U R                  R"                  (       a  [%        US9U l        U R)                  5         g s  snf )Nrq   rV   rX   rW   )num_embeddingsembedding_dim)r1   )r6   r7   r1   rS   patch_lengthr8   r9   input_ff_layerr:   	Embedding	freq_sizefreq_emb
ModuleListrangenum_hidden_layersr   layersuse_positional_embeddingr   position_emb	post_initr   s      r&   r7   TimesFmModel.__init__N  s     26...**00

 F4D4DTZTfTfgmmEJ6KcKcEdeEd	 3Ede
 ;;// :& ID 	 fs   C8inputspatched_padsrh   c           	         U R                  X5      u  p4[        R                  " X@R                  R                  S9nXSS2SS4   -
  USS2SS4   -  n[        R
                  " [        R                  " XR                  R                  -
  5      U R                  R                  :  [        R                  " U R                  R                  UR                  UR                  S9U5      nXSU44$ )zInput is of shape [B, N, P].minNr   )_timesfm_masked_mean_stdr!   clampr1   	tolerancewhereabspad_valtensorrt   r   )r@   r  r  musigmarK   s         r&   _forward_transformTimesFmModel._forward_transforma  s     11&G	E{{'<'<= q$}--q$}1EE++IIf{{2223dkk6K6KKLL,,GMM'..Y

 U##r%   r   past_values_paddingfreqr   c           	      Z   UR                   S   nUR                  USU R                  R                  5      nUR                  USU R                  R                  5      n[        R
                  " [        R                  " US-
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      n[        R
                  " [        R                  " X`R                  R                  -
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      nU R                  Xg5      u  phUSU-
  -  n[        R                  " Xg/SS9n	U R                  U	5      n
[        R                  " USS9S   nU R                  R                   (       aY  U R#                  U
R                   S   5      n[        R$                  " U/U
R                   S   -  SS9nU R'                  X5      nX-  n
U R)                  U5      nX-  n
U
nU R+                  UUR                   S   UR                  UR                  SS	9nU R,                  S
U R                  R.                    H  nU" U4UUS.UD6nM     [1        UUS   US   S9$ )ax  
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
past_values_padding (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    The padding indicator of the time series.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.
r   rr   rD   r   r   r   r   T)r   sequence_lengthrt   r   r   N)r   rH   )last_hidden_stater   r   )r   r   r1   r  r!   r  r  r  r  rt   r   r  r  r   r  r  r  r  concat_timesfm_shift_padded_seqr  _prepare_4d_attention_maskr  r
  r   )r@   r   r   r!  r   bsizepatched_inputsr  statsconcat_inputsmodel_inputpatched_paddingpos_embf_embro   r   layers                    r&   rL   TimesFmModel.forwardq  ss   & !!!$$))%T[[5M5MN*//r4;;;S;STIIlS()DKK,A,AALLN$8$8AVAVW

 {{IIn{{':'::;dkk>S>SSLLL$6$6|?R?RS

 !% 7 7 U (3+=>		>"@bI))-8  ))Lb9!<;;//''(9(9!(<=GllG9{/@/@/C#CKG44_NG"Kd# $88*)//2%% '' 9 
 [[!@4;;#@#@AE!-( 	M B +a(
 	
r%   r   Nr#  rt   r   r   c                    UR                   (       a   [        R                  " U5      R                  O[        R                  " U5      R                  nU b%  U R                  U R                  S   SSS5      n X-  n U(       a\  [        R                  " [        R                  " X4X#S9U-  SS9nUR                  SSX5      nU b  [        R                  " X5      n U $ Un U $ )a  
Creates 4D attention mask and combines causal and padding masks if needed.

Args:
    attention_mask: Optional tensor of shape (batch_size, seq_length) containing padding mask
    sequence_length: Length of the sequence
    dtype: Data type of the mask
    device: Device of the mask
    is_causal: Whether to apply causal masking

Returns:
    4D attention mask of shape (batch_size, 1, seq_length, seq_length)
r   r   rr   r   )diagonal)
is_floating_pointr!   finfor  iinfor   r   triurk   minimum)r   r#  rt   r   r   	min_valuecausal_masks          r&   r'  'TimesFmModel._prepare_4d_attention_mask  s    , /4.E.EEKK&**5;;W\K]KaKa	 %+001E1Ea1H!QPRSN+7N **

O=UZ]ffK &**1aRK )!&~!K  "-r%   paddingc                 (   S[         R                  4S jn[         R                  " SU-
  SS9nU" U5      n[         R                  " U R                  S   5      nXUSS24   nXUSS24   nSU-
  n[         R                  " USS9n	[         R
                  " U	SS	9n	[         R                  " Xh-  SS9n
X-  nXkR                  S
5      -
  U-  n[         R                  " US-  SS9U	-  n[         R
                  " USS	9n[         R                  " U5      nX4$ )a  Calculates mean and standard deviation of `inputs` across axis 1.

It excludes values where `padding` is 1.

Args:
    inputs: A PyTorch tensor of shape [b, n, p].
    padding: A PyTorch tensor of shape [b, n, p] with values 0 or 1.

Returns:
    A tuple containing the mean and standard deviation.
    We return the statistics of the first patch with more than three non-padded values.
arrc                     [         R                  " U S:  R                  [         R                  5      SS9nU S:  R                  [         R                  5      R	                  SS9n[         R
                  " US:H  U R                  S   S-
  U5      $ )Nr   r   r   r   )r!   argmaxru   int32sumr  r   )r>  indicesrow_sums      r&   _get_patch_index?TimesFmModel._timesfm_masked_mean_std.<locals>._get_patch_index  sk    llC1H==#=1EGaxmmEKK0444;G;;w!|SYYq\A-=wGGr%   r   rq   r   r   NrD   r  rr   r   )r!   r"   rB  r   r   r  r   r   )r  r<  rE  pad_sumpatch_indicesbidxsr>  r   masknum_valid_elements
masked_summasked_meanmasked_centered_arr
masked_var
masked_stds                  r&   r  %TimesFmModel._timesfm_masked_mean_std  s    	H%,, 	H
 ))AKQ/(1V\\!_-M1,-]A-. 3w #YYt3"[[);E YYszq1
 5  #%:%:2%>>$FYY2A51=@RR
[[5
ZZ
+
&&r%   rJ  seqc                 l   UR                   u  p#nU S:H  nUR                  [        R                  5      R	                  SS9nSXeR                  SS9) '   [        R                  " X1R                  S9R                  SSS5      R                  USU5      nXvSS2SS4   -
  U-  nUR                  SU5      n	U	$ )zShifts rows of seq based on the first 0 in each row of the mask.

Args:
    mask: mask tensor of shape [B, N]
    seq: seq tensor of shape [B, N, P]

Returns:
    The shifted sequence.
r   r   r   rr   )r   N)r   ru   r!   rA  r@  anyr   r   r   expandgather)
rJ  rR  
batch_sizenum_seqfeature_dimnew_maskrC  	idx_rangeshifted_idxshifted_seqs
             r&   r&  &TimesFmModel._timesfm_shift_padded_seq  s     ,/99(
[%)QY ++ekk*11a18 )+!$$% LL<AA!RKRRS]_acno	 !1dD=#99WD jjK0r%   )r1   r  r  r  r  )T)r   r   r   r   r   r7   r!   r"   r~   r  r   r   r   
LongTensorr   r   r   rL   staticmethodr   rt   r   boolr'  r  r&  r$   rP   rQ   s   @r&   r   r   L  s   } &$ll$27,,$	u||U5<<#=>>	?$   F
\\F
 #--F
 ll	F

 +,F
 
F
    F
P  +t+++ {{+ 	+
 + 
	+ +Z ,' ,' ,'QVW\WcWcejeqeqWqQr ,' ,'\  5<< ELL  r%   r   c                   l  ^  \ rS rSrSrS\4U 4S jjr SS\\R                     S\\
   S-  S\
S-  S	\\R                  S
4   4S jjrS\R                  S\\R                  \R                  4   S	\R                  4S jrS\R                  S\R                  S	\R                  4S jr\\      SS\\R                     S\\R                  \
-     S-  S\
S-  S\R                  S-  S\
S-  S\S\S\\   S	\4S jj5       5       r\S\R                  S\
S	\\R                     4S j5       rSrU =r$ )TimesFmModelForPredictioni:  z/TimesFM model for quantile and mean prediction.r1   c                 B  > [         TU ]  U5        Xl        UR                  U l        UR
                  U l        [        U5      U l        [        UR                  UR
                  S[        UR                  5      -   -  UR                  S9U l        U R                  5         g )Nr   r   )r6   r7   r1   context_lengthcontext_lenhorizon_lengthhorizon_lenr   decoderrS   r8   len	quantilesr9   horizon_ff_layerr  )r@   r1   rA   s     r&   r7   "TimesFmModelForPrediction.__init__=  s     !00!00#F+ !5))--S9I9I5J1JK00!
 	r%   Nr  r!  rf  rh   .c           	      2   Uc  U R                   n/ / pTU GH  nUR                  S   n[        R                  " XpR                  -   UR
                  UR                  S9nXs:  a  X7-
  n	[        R                  " [        R                  " XR
                  UR                  S9U/SS9n[        R                  " [        R                  " XR
                  UR                  S9U/SS9nOXs:  a  Xc* S nXU R                  -   * S nUR                  U5        UR                  U5        GM     [        R                  " USS9[        R                  " USS94n
UbC  U
[        R                  " US[        U5       [        R                  S9R                  SS5      4-   n
U
$ )a  Pad/truncate input time series to `context_len` and build a padding mask.

Args:
    inputs: A list of 1d Tensors. Each Tensor is the context time series of a single forecast task.
    freq: Optional list of frequencies (returned as a tensor when provided).
    context_len: Optional context length override (defaults to `self.context_len`).

Returns:
    Tuple of (padded_inputs, padding_mask) and optionally a freq tensor.
Nr   r   r   r   rr   r   )rf  r   r!   zerosrh  rt   r   r   rk   appendstackr  rj  rA  r   )r@   r  r!  rf  input_tsinput_paddingts	input_lenr<  num_front_padresults              r&   _preprocess%TimesFmModelForPrediction._preprocessP  si    **K"$b-BIkk).>.>">bhhWYW`W`aG& + 7YYMRTR[R[ \^`aghi))UZZXXV]VdVd%egn$ouvw(&!$2B2B$B"C"EFOOB  )  ++hA.Mq0QRu||D3v;,?u{{S[[\^`abddFr%   model_outputr*  c                 
   U R                  U5      nUR                  u  pEnUR                  XEU R                  R                  [        U R                  R                  5      S-   5      nUu  pxX8SS2SSS4   -  USS2SSS4   -   $ )z*Postprocess output of stacked transformer.r   N)rl  r   r   r1   rg  rj  rk  )	r@   rz  r*  	output_tsbnr   r  r  s	            r&   _postprocess_output-TimesFmModelForPrediction._postprocess_outputu  s     )),7	 //aNN1)C)CSI^I^E_bcEcd		D$!4551dD$;N8OOOr%   predictionstargetsc                 ,   / n[        U R                  R                  5       HL  u  pEX!SU4   -
  n[        R                  " US-
  U-  XV-  5      nUR                  UR                  5       5        MN     [        R                  " U5      R                  5       $ )N.r   )	enumerater1   rk  r!   r   rp  rx   rq  )r@   r  r  lossesiqerrorsr,   s           r&   _quantile_loss(TimesFmModelForPrediction._quantile_loss  sz    dkk334DA3622F99a!ev-qz:DMM$))+& 5 {{6"''))r%   r   window_sizefuture_valuesforecast_context_lenreturn_forecast_on_contexttruncate_negativer   c           
      :	   Uc  U R                   n	OUn	US   R                  n
U Vs/ s H  oU	* S PM
     nn[        R                  " [        R                  " U Vs/ s H  n[        R                  " U5      PM     sn5      5      nUb]  / n/ n[        U5       HC  u  nnUR                  U R                  X5      5        Uc  M+  UR                  UU   /S-  5        ME     UnUb  UnUc$  [        R                  S5        S/[        U5      -  nU R                  X5      u  nnnUR                  U
5      nUR                  U
5      nUR                  U
5      nUnUR                  S   n/ nUR                  S   UR                  S   U R                  -   :w  a8  [        SUR                  S    SUR                  S    SU R                   35      eU R                   R"                  nU R                  U-   S-
  U-  n[%        U5       GH4  nUSS2SUR                  S   24   nUSS2U	* S24   nUSS2U	* S24   nU R&                  " SUUUS	.UD6nU R)                  UR*                  UR,                  UR.                  45      nU(       am  US:X  ag  USS2SS
2SU R                   R0                  2SS24   nUR3                  UR5                  S5      S
UR5                  S5      5      nUR7                  U5        USS2S
SU2S4   nUSS2S
SU2SS24   nUR7                  U5        [        R8                  " UU/S
S9nGM7     U(       aF  [        R8                  " USS9SS2SUU R                   R0                  -
  U R                  -   2SS24   nO+[        R8                  " USS9SS2SU R                  2SS24   nUSS2SS2S4   nUb*  USSS2S4   USSS2S4   -   nUSSS2S4   USSS2S4   -   nUS:  a5  U(       a.  [        R:                  " US5      n[        R:                  " US5      nSn Ub:  [<        R>                  " UU5      n!U RA                  USS2SS2SS24   U5      n"U!U"-   n [C        WR*                  URD                  URF                  UUU S9$ s  snf s  snf )a  
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.
window_size (`int`, *optional*):
    Window size of trend + residual decomposition. If None then we do not do decomposition.
future_values (`torch.Tensor`, *optional*):
    Optional future time series values to be used for loss computation.
forecast_context_len (`int`, *optional*):
    Optional max context length.
return_forecast_on_context (`bool`, *optional*):
    True to return the forecast on the context when available, i.e. after the first input patch.
truncate_negative (`bool`, *optional*):
    Truncate to only non-negative values if any of the contexts have non-negative values,
    otherwise do nothing.

Example:

```python
>>> from transformers import TimesFmModelForPrediction

>>> model = TimesFmModelForPrediction.from_pretrained("google/timesfm-2.0-500m-pytorch")

>>> forecast_input = [torch.linspace(0, 20, 100).sin(), torch.linspace(0, 20, 200).sin(), torch.linspace(0, 20, 400).sin()]
>>> frequency_input = torch.tensor([0, 1, 2], dtype=torch.long)

>>> # Generate
>>> with torch.no_grad():
>>>     outputs = model(past_values=forecast_input, freq=frequency_input, return_dict=True)
>>>     point_forecast_conv = outputs.mean_predictions
>>>     quantile_forecast_conv = outputs.full_predictions
```
Nr   rq   z6No frequency provided via `freq`. Default to high (0).r   z=Length of paddings must match length of input + horizon_len: z != z + )r   r   r!  rr   r   )axis.r   )r$  r   ro   r*   r+   r,   r   )$rf  r   r!   r  rq  r  extend_timesfm_moving_averageloggerinforj  rx  ru   r   rh  r   r1   rg  r	  ri  r  r$  r   r   r  r   sizerp  concatenatemaximumrE   mse_lossr  r(   r   ro   )#r@   r   r!  r  r  r  r  r  r   fcontext_lenr   rt  r  inp_min
new_inputs	new_freqsr  rr  rs  inp_freq	final_outrf  full_outputsoutput_patch_lennum_decode_patches
step_indexcurrent_paddingdecoder_outputfprop_outputsnew_full_tsnew_tsmean_outputsr,   r  quantile_losss#                                      r&   rL   !TimesFmModelForPrediction.forward  s   ^  '++L/LQ&&/:;{l]^${;))EKK(H22(HIJ"JI"6*2!!$">">r"OP#$$d1gY]3 +  F <KKPQ3V$D,0,<,<V,J)-;;v&%((0;;v&	ooa(q!Y__Q%7$:J:J%JJ!''*+4	0B/C3tGWGWFXZ   ;;55"..1AAAEJZZ 23J+Aq9??13E/E,EFO \MN!23H+A}~,=>M,0LL -$$1- 	-N !4400##^%9%9:M
 *jAo+Ass4Ndkk6N6N4NPQ,QR)11+2B2B12Er;K[K[\]K^_##K0"1b*;+;*;Q#>?F'2/@0@/@!(CDK,))9f*=BGI/ 42 & ,,\BPkDKK$<$<<t?O?OOPRSSL !,,\B1a$JZJZFZ\]C]^L#Aq!G,"'1c	2\!$Q$)5LLL'1c	2\!$Q$)5LLLa<- ==s;L ==s;L$zz,>H //Q12X0FVMm+D),>>%00(66))
 	
c <(Hs   R Rr>  c                 .   [         R                  " XS-
  S4SS5      n[        R                  " XR                  U R
                  S9U-  n[         R                  " UR                  SSS5      UR                  SSS5      5      R                  5       nX@U-
  /$ )zCCalculates the moving average using PyTorch's convolution function.r   r   constantr   rr   )	rE   r   r!   rk   rt   r   conv1dr   squeeze)r>  r  
arr_paddedkernelsmoothed_arrs        r&   r  1TimesFmModelForPrediction._timesfm_moving_average  s}     UU3q! 4j!D
KyyL{Zxx
1b 96;;q!R;PQYY[L011r%   )r1   rf  ri  rl  rh  r   )NNNNFF)r   r   r   r   r    r   r7   r   r!   r"   r   r~   rx  r  r  r   r   ra  r   r   r(   rL   r`  listr  r$   rP   rQ   s   @r&   rc  rc  :  s   9} ( lp#u||,#4<SMD4H#^adh^h#	u||S 	!#JP!LLP16u||U\\7Q1RP	P*%,, * *RWR^R^ *  59"&-1+/+0"'L
ell+L
 u||c)*T1L
 4Z	L

 ||d*L
 "DjL
 %)L
  L
 +,L
 
$L
  L
\ 2U\\ 2 2U\\HZ 2 2r%   rc  )rc  r   r   )r   )9r   collections.abcr   r   dataclassesr   r!   torch.nnr:   torch.nn.functionalr   rE    r   r   integrationsr   modeling_flash_attention_utilsr	   modeling_outputsr
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   configuration_timesfmr   
get_loggerr   r  r   r(   Moduler/   rS   rf   r   r"   r-   r   r   r   r   r   r   rc  __all__r   r%   r&   <module>r     s  *  . !     & 7 B / F & R R 7 5 0 
		H	% 
	&O 	&  	& 
- -  - ,!299 !, Y'JRYY J (J(+ +j %II%,,% % ,,	%
 LL4'% % S[% '(%,9)ryy 9)x")) @ _  > j) j jZk2 6 k2\ Rr%   