
    Z jPw                        S r SSKrSSKJrJr  SSKJr  SSKrSSKJ	r	  SSK
J	s  Jr  SSKJr  SSKJr  SSKJr  SS	KJrJr  SS
KJr  SSKJrJrJrJr  SSKJr  SSK J!r!  SSK"J#r#  SSK$J%r%  SSK&J'r'  \RP                  " \)5      r*\\ " S S\5      5       5       r+\\ " S S\5      5       5       r, " S S\	RZ                  5      r. " S S\	RZ                  5      r/ " S S\#5      r0 " S S\	RZ                  5      r1 " S S \	RZ                  5      r2 " S! S"\	RZ                  5      r3\ " S# S$\5      5       r4\ " S% S&\45      5       r5 " S' S(\45      r6/ S)Qr7g)*zPyTorch TimesFM model.    N)CallableSequence)	dataclass   )initialization)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging)merge_with_config_defaults)capture_outputs   )LlamaRMSNorm)simple_eager_attention_forward   )TimesFmConfigc                   j    \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
g)TimesFmOutput(   z
loc (`torch.Tensor` of shape `(batch_size, )`):
    The mean of the time series inputs.
scale (`torch.Tensor` of shape `(batch_size,)`):
    The scale of the time series inputs.
Nlocscale )__name__
__module____qualname____firstlineno____doc__r   torchTensor__annotations__r   __static_attributes__r       |/root/GenerationalWealth/GenerationalWealth/venv/lib/python3.13/site-packages/transformers/models/timesfm/modular_timesfm.pyr   r   (   s/      $C	#!%E5<<$%r'   r   c                       \ rS rSr% SrSr\R                  S-  \S'   Sr	\R                  S-  \S'   Sr
\R                  \-  S-  \S'   Srg)TimesFmOutputForPrediction6   a  
mean_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
    The mean predictions of the time series.
full_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
    The full predictions of the time series including the mean and the quantiles.
loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `future_values` is provided):
    The loss of the TimesFM model.
Nmean_predictionsfull_predictionslossr   )r   r   r    r!   r"   r,   r#   r$   r%   r-   r.   floatr&   r   r'   r(   r*   r*   6   sI     -1ellT)0,0ellT)0(,D%,,

%,r'   r*   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )
TimesFmMLPG   zPax MLP in pytorch.configc                    > [         TU ]  5         UR                  nUR                  n[        R
                  " X#5      U l        [        R
                  " X25      U l        [        R                  " USS9U l	        g )Ngư>)normalized_shapeeps)
super__init__hidden_sizeintermediate_sizennLinear	gate_proj	down_proj	LayerNorm
layer_norm)selfr3   r9   r:   	__class__s       r(   r8   TimesFmMLP.__init__J   sX    (("44;B#4B,,Nr'   c                     U R                  U5      nU R                  U5      n[        R                  " U5      nU R	                  U5      nUb  USUS S 2S S 2S 4   -
  -  nXQ-   $ )N      ?)r@   r=   Frelur>   )rA   xpaddingsgate_inpgateoutputss         r(   forwardTimesFmMLP.forwardS   sa    ??1%~~h'vvd|..&x1d
';!;<G{r'   )r>   r=   r@   N
r   r   r    r!   r"   r   r8   rM   r&   __classcell__rB   s   @r(   r1   r1   G   s    O} O r'   r1   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )TimesFmResidualBlock]   zTimesFM residual block.c                   > [         TU ]  5         Xl        X l        X0l        [
        R                  " X5      U l        [
        R                  " 5       U l	        [
        R                  " X#5      U l
        [
        R                  " X5      U l        g rO   )r7   r8   
input_dimshidden_dimsoutput_dimsr;   r<   input_layerSiLU
activationoutput_layerresidual_layer)rA   rW   rX   rY   rB   s       r(   r8   TimesFmResidualBlock.__init__`   s_    $&&99Z='')IIk? ii
@r'   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nX4-   $ rO   )rZ   r\   r]   r^   )rA   rH   hiddenoutputresiduals        r(   rM   TimesFmResidualBlock.forwardk   sI    !!!$(""6*&&q)  r'   )r\   rX   rW   rZ   rY   r]   r^   )	r   r   r    r!   r"   r8   rM   r&   rQ   rR   s   @r(   rT   rT   ]   s    !	A! !r'   rT   c                       \ rS rSrSrg)TimesFmRMSNorms   r   N)r   r   r    r!   r&   r   r'   r(   rf   rf   s   s    r'   rf   c                   >   ^  \ rS rSrSrS\4U 4S jjrSS jrSrU =r	$ )TimesFmPositionalEmbeddingw   z6Generates position embedding for a given 1-d sequence.r3   c                   > [         TU ]  5         UR                  nUR                  nX#sU l        U l        UR                  U l        U R
                  S-  n[        R                  " [        U5      [        U5      -  5      [        US-
  S5      -  nU R                  SU[        R                  " [        R                  " U[        R                  S9U* -  5      -  5        g )Nr   r   inv_timescalesdtype)r7   r8   min_timescalemax_timescaler9   embedding_dimsmathlogr/   maxregister_bufferr#   exparangefloat32)rA   r3   ro   rp   num_timescaleslog_timescale_incrementrB   s         r(   r8   #TimesFmPositionalEmbedding.__init__z   s    ,,,,1>.D.$00,,1"&((5+?%BV+V"WZ]^lop^prsZt"tEIIell>&W[rZr&rss	
r'   c                 T   Uc  Uc  [        S5      eUcH  [        R                  " U[        R                  U R                  R
                  S9R                  S5      nO(UR                  S:w  a  [        SUR                   35      eUR                  " / UR                  QSP76 U R                  R                  SSS5      -  n[        R                  " [        R                  " U5      [        R                  " U5      /SS9n[        R                  " USSSU R                  S-  45      nU$ )	a  Generates a Tensor of sinusoids with different frequencies.

Args:
    seq_length: an optional Python int defining the output sequence length.
      if the `position` argument is specified.
    position: [B, seq_length], optional position for each token in the
      sequence, only required when the sequence is packed.

Returns:
    [B, seqlen, D] if `position` is specified, else [1, seqlen, D]
z.Either position or seq_length must be providedrn   devicer   r   z*position must be 2-dimensional, got shape r   dim)
ValueErrorr#   rw   rx   rl   r~   	unsqueezendimshapeviewcatsincosrF   padrq   )rA   
seq_lengthpositionscaled_timesignals        r(   rM   "TimesFmPositionalEmbedding.forward   s     
 2MNN||JemmDL_L_LfLfgqqrstH]]aI(..IYZ[[mm7X^^7Q7$:M:M:R:RSTVWY[:\\EIIk2EIIk4JKQRS v1a)<)<q)@ABr'   )rq   rp   ro   NNrP   rR   s   @r(   ri   ri   w   s    @
} 
 r'   ri   c                      ^  \ rS rSrSrS\S\4U 4S jjrS\R                  S\R                  4S jr
 SS
\R                  S\R                  S	-  S\\   S\\R                  \R                  S	-  4   4S jjrSrU =r$ )TimesFmAttention   zlImplements the attention used in TimesFM. One key difference is that there is _per_dim_scaling of the query.r3   	layer_idxc                   > [         TU ]  5         Xl        SU l        UR                  U l        X l        UR                  U l        UR                  U l        UR                  U l	        U R                  U R                  -  U l
        U R                  U R                  -  U l        [        R                  " [        R                  " U R                  45      5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  U R                  -  5      U l        [        R"                  " U R                  U R                  -  U R                  5      U l        g )NT)r7   r8   r3   	is_causalattention_dropoutr   num_attention_heads	num_headsr9   head_dimq_sizekv_sizer;   	Parameterr#   emptyscalingr<   q_projk_projv_projo_projrA   r3   r   rB   s      r(   r8   TimesFmAttention.__init__   s/   !'!9!9"33!--nnt}}4~~5||EKK0@$ABii 0 0$..4==2PQii 0 0$..4==2PQii 0 0$..4==2PQii >@P@PQr'   queryreturnc                     [         R                  " U R                  5      R                  S[        R
                  " U R                  5      -  5      nXS S S S S 24   -  $ )Ng^$3eG?)rF   softplusr   mulrr   sqrtr   )rA   r   r   s      r(   _scale_queryTimesFmAttention._scale_query   sH    

4<<(,,[499T]];S-STT4q0111r'   Nhidden_statesattention_maskkwargsc                    UR                   S S n/ UQSPU R                  P7nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      nU R                  U5      R                  U5      R	                  SS5      nU R                  U5      R                  U5      R	                  SS5      n[        R                  " U R                  R                  [        5      n	U	" U UUUU4U R                  (       d  SOU R                  SS.UD6u  pU
R                  " / UQSP76 R!                  5       n
U R#                  U
5      n
X4$ )Nr   r   r           rE   )dropoutr   )r   r   r   r   	transposer   r   r   r
   get_interfacer3   _attn_implementationr   trainingr   reshape
contiguousr   )rA   r   r   r   input_shapehidden_shapequery_states
key_statesvalue_statesattention_interfaceattn_outputattn_weightss               r(   rM   TimesFmAttention.forward   sQ    $))#2.88b8$--8{{=166|DNNqRST((6[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.L)
 %8	%
  $}}C$2H2H	%
 	%
! "));;;;FFHkk+.((r'   )r   r3   r   r9   r   r   r   r   r   r   r   r   r   r   rO   )r   r   r    r!   r"   r   intr8   r#   r$   r   r   r   tuplerM   r&   rQ   rR   s   @r(   r   r      s    vR} R R(2%,, 25<< 2 /3)||) t+) -.	)
 
u||U\\D00	1) )r'   r   c                      ^  \ rS rSrSrS\S\4U 4S jjrS\R                  S\R                  S\R                  S	\R                  4S
 jr
SrU =r$ )TimesFmDecoderLayer   zTransformer layer.r3   r   c                    > [         TU ]  5         [        XS9U l        [	        U5      U l        [        UR                  UR                  S9U l	        g )N)r   )r6   )
r7   r8   r   	self_attnr1   mlprf   r9   rms_norm_epsinput_layernormr   s      r(   r8   TimesFmDecoderLayer.__init__   sA    )&Ff%-f.@.@fFYFYZr'   r   r   rI   r   c                 v    UnU R                  U5      nU R                  UUS9u  pXQ-   nU R                  XS9nU$ )N)r   r   )rI   )r   r   r   )rA   r   r   rI   r   rc   _s          r(   rM   TimesFmDecoderLayer.forward   sW     !,,];>>') * 
 !0 Br'   )r   r   r   )r   r   r    r!   r"   r   r   r8   r#   r$   rM   r&   rQ   rR   s   @r(   r   r      s\    [} [ [||  ,,	 
 r'   r   c                   |   ^  \ rS rSr% \\S'   SrS/rSrSr	Sr
\\S.r\R                  " 5       U 4S	 j5       rS
rU =r$ )TimesFmPreTrainedModeli  r3   timesfmr   past_values)timeT)r   
attentionsc                 8  > [         TU ]  U5        [        U[        5      (       a!  [        R
                  " UR                  5        g [        U[        5      (       a  UR                  S-  nUR                  UR                  pC[        R                  " [        U5      [        U5      -  5      [        US-
  S5      -  n[        R                  " UR                   U["        R$                  " ["        R&                  " U["        R(                  S9U* -  5      -  5        g g )Nr   r   rm   )r7   _init_weights
isinstancer   initones_r   ri   rq   rp   ro   rr   rs   r/   rt   copy_rl   r#   rv   rw   rx   )rA   modulery   rp   ro   rz   rB   s         r(   r   $TimesFmPreTrainedModel._init_weights  s    f%f.//JJv~~& :;;#22a7N+1+?+?AUAU=&*hhu]/CeMFZ/Z&[^a"A_ '# JJ%%))ELLu}}MQhPhhij <r'   r   )r   r   r    r!   r   r%   base_model_prefix_no_split_modulesmain_input_nameinput_modalities_supports_sdpar   r   _can_record_outputsr#   no_gradr   r&   rQ   rR   s   @r(   r   r     sP    !./#O N,&
 ]]_ r'   r   c                     ^  \ rS rSrS\4U 4S jjrS\R                  S\R                  S\\R                  \\R                  \R                  4   4   4S jr	\
\\S\R                  S	\R                  S
\R                  S\\   S\4
S j5       5       5       r\ SS\R                  S-  S\S\R(                  S\R*                  S\S\R                  S-  4S jj5       r\S\R                  S\R                  S\\R                  \R                  4   4S j5       r\S\R                  S\R                  S\R                  4S j5       rSrU =r$ )TimesFmModeli!  r3   c           	        > [         TU ]  U5        Xl        [        SUR                  -  UR
                  UR                  S9U l        [        R                  " UR                  UR
                  S9U l        [        R                  " [        UR                  5       Vs/ s H  n[        X5      PM     sn5      U l        U R                  R"                  (       a  [%        US9U l        U R)                  5         g s  snf )Nr   rW   rY   rX   )num_embeddingsembedding_dim)r3   )r7   r8   r3   rT   patch_lengthr9   r:   input_ff_layerr;   	Embedding	freq_sizefreq_emb
ModuleListrangenum_hidden_layersr   layersuse_positional_embeddingri   position_emb	post_initr   s      r(   r8   TimesFmModel.__init__#  s     26...**00

 F4D4DTZTfTfgmmEJ6KcKcEdeEd	 3Ede
 ;;// :& ID 	 fs   C8inputspatched_padsr   c           	         U R                  X5      u  p4[        R                  " X@R                  R                  S9nXSS2SS4   -
  USS2SS4   -  n[        R
                  " [        R                  " XR                  R                  -
  5      U R                  R                  :  [        R                  " U R                  R                  UR                  UR                  S9U5      nXSU44$ )zInput is of shape [B, N, P].minNr}   )_timesfm_masked_mean_stdr#   clampr3   	tolerancewhereabspad_valtensorrn   r~   )rA   r   r   musigmarL   s         r(   _forward_transformTimesFmModel._forward_transform6  s     11&G	E{{'<'<= q$}--q$}1EE++IIf{{2223dkk6K6KKLL,,GMM'..Y

 U##r'   r   past_values_paddingfreqr   c           	      Z   UR                   S   nUR                  USU R                  R                  5      nUR                  USU R                  R                  5      n[        R
                  " [        R                  " US-
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      n[        R
                  " [        R                  " X`R                  R                  -
  5      U R                  R                  :  [        R                  " SUR                  UR                  S9U5      nU R                  Xg5      u  phUSU-
  -  n[        R                  " Xg/SS9n	U R                  U	5      n
[        R                  " USS9S   nU R                  R                   (       aY  U R#                  U
R                   S   5      n[        R$                  " U/U
R                   S   -  SS9nU R'                  X5      nX-  n
U R)                  U5      nX-  n
U
nU R+                  UUR                   S   UR                  UR                  SS	9nU R,                  S
U R                  R.                    H  nU" U4UUS.UD6nM     [1        UUS   US   S9$ )ax  
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
past_values_padding (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
    The padding indicator of the time series.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.
r   r   rE   r   r}   r   r   T)r   sequence_lengthrn   r~   r   N)r   rI   )last_hidden_stater   r   )r   r   r3   r   r#   r  r  r   r  rn   r~   r  r  r   r   r   r   r   concat_timesfm_shift_padded_seqr   _prepare_4d_attention_maskr   r   r   )rA   r   r	  r
  r   bsizepatched_inputsr   statsconcat_inputsmodel_inputpatched_paddingpos_embf_embr   r   layers                    r(   rM   TimesFmModel.forwardF  ss   & !!!$$))%T[[5M5MN*//r4;;;S;STIIlS()DKK,A,AALLN$8$8AVAVW

 {{IIn{{':'::;dkk>S>SSLLL$6$6|?R?RS

 !% 7 7 U (3+=>		>"@bI))-8  ))Lb9!<;;//''(9(9!(<=GllG9{/@/@/C#CKG44_NG"Kd# $88*)//2%% '' 9 
 [[!@4;;#@#@AE!-( 	M B +a(
 	
r'   r   Nr  rn   r~   r   c                    UR                   (       a   [        R                  " U5      R                  O[        R                  " U5      R                  nU b%  U R                  U R                  S   SSS5      n X-  n U(       a\  [        R                  " [        R                  " X4X#S9U-  SS9nUR                  SSX5      nU b  [        R                  " X5      n U $ Un U $ )a  
Creates 4D attention mask and combines causal and padding masks if needed.

Args:
    attention_mask: Optional tensor of shape (batch_size, seq_length) containing padding mask
    sequence_length: Length of the sequence
    dtype: Data type of the mask
    device: Device of the mask
    is_causal: Whether to apply causal masking

Returns:
    4D attention mask of shape (batch_size, 1, seq_length, seq_length)
r   r   r   r}   )diagonal)
is_floating_pointr#   finfor   iinfor   r   triuonesminimum)r   r  rn   r~   r   	min_valuecausal_masks          r(   r  'TimesFmModel._prepare_4d_attention_mask  s    , /4.E.EEKK&**5;;W\K]KaKa	 %+001E1Ea1H!QPRSN+7N **

O=UZ]ffK &**1aRK )!&~!K  "-r'   paddingc                 (   S[         R                  4S jn[         R                  " SU-
  SS9nU" U5      n[         R                  " U R                  S   5      nXUSS24   nXUSS24   nSU-
  n[         R                  " USS9n	[         R
                  " U	SS	9n	[         R                  " Xh-  SS9n
X-  nXkR                  S
5      -
  U-  n[         R                  " US-  SS9U	-  n[         R
                  " USS	9n[         R                  " U5      nX4$ )a  Calculates mean and standard deviation of `inputs` across axis 1.

It excludes values where `padding` is 1.

Args:
    inputs: A PyTorch tensor of shape [b, n, p].
    padding: A PyTorch tensor of shape [b, n, p] with values 0 or 1.

Returns:
    A tuple containing the mean and standard deviation.
    We return the statistics of the first patch with more than three non-padded values.
arrc                     [         R                  " U S:  R                  [         R                  5      SS9nU S:  R                  [         R                  5      R	                  SS9n[         R
                  " US:H  U R                  S   S-
  U5      $ )Nr   r   r   r   )r#   argmaxtoint32sumr  r   )r(  indicesrow_sums      r(   _get_patch_index?TimesFmModel._timesfm_masked_mean_std.<locals>._get_patch_index  sk    llC1H==#=1EGaxmmEKK0444;G;;w!|SYYq\A-=wGGr'   r   r   r   r   NrE   r   r   r   )r#   r$   r-  rw   r   r   r   r   )r   r&  r0  pad_sumpatch_indicesbidxsr(  r   masknum_valid_elements
masked_summasked_meanmasked_centered_arr
masked_var
masked_stds                  r(   r   %TimesFmModel._timesfm_masked_mean_std  s    	H%,, 	H
 ))AKQ/(1V\\!_-M1,-]A-. 3w #YYt3"[[);E YYszq1
 5  #%:%:2%>>$FYY2A51=@RR
[[5
ZZ
+
&&r'   r5  seqc                 l   UR                   u  p#nU S:H  nUR                  [        R                  5      R	                  SS9nSXeR                  SS9) '   [        R                  " X1R                  S9R                  SSS5      R                  USU5      nXvSS2SS4   -
  U-  nUR                  SU5      n	U	$ )zShifts rows of seq based on the first 0 in each row of the mask.

Args:
    mask: mask tensor of shape [B, N]
    seq: seq tensor of shape [B, N, P]

Returns:
    The shifted sequence.
r   r   r   r   )r~   N)r   r+  r#   r,  r*  anyrw   r~   r   expandgather)
r5  r=  
batch_sizenum_seqfeature_dimnew_maskr.  	idx_rangeshifted_idxshifted_seqs
             r(   r  &TimesFmModel._timesfm_shift_padded_seq  s     ,/99(
[%)QY ++ekk*11a18 )+!$$% LL<AA!RKRRS]_acno	 !1dD=#99WD jjK0r'   )r3   r   r   r   r   )T)r   r   r    r!   r   r8   r#   r$   r   r  r   r   r   
LongTensorr   r   r   rM   staticmethodr   rn   r~   boolr  r   r  r&   rQ   rR   s   @r(   r   r   !  s   } &$ll$27,,$	u||U5<<#=>>	?$   F
\\F
 #--F
 ll	F

 +,F
 
F
    F
P  +t+++ {{+ 	+
 + 
	+ +Z ,' ,' ,'QVW\WcWcejeqeqWqQr ,' ,'\  5<< ELL  r'   r   c                   l  ^  \ rS rSrSrS\4U 4S jjr SS\\R                     S\\
   S-  S\
S-  S	\\R                  S
4   4S jjrS\R                  S\\R                  \R                  4   S	\R                  4S jrS\R                  S\R                  S	\R                  4S jr\\      SS\\R                     S\\R                  \
-     S-  S\
S-  S\R                  S-  S\
S-  S\S\S\\   S	\4S jj5       5       r\S\R                  S\
S	\\R                     4S j5       rSrU =r$ )TimesFmModelForPredictioni  z/TimesFM model for quantile and mean prediction.r3   c                 B  > [         TU ]  U5        Xl        UR                  U l        UR
                  U l        [        U5      U l        [        UR                  UR
                  S[        UR                  5      -   -  UR                  S9U l        U R                  5         g )Nr   r   )r7   r8   r3   context_lengthcontext_lenhorizon_lengthhorizon_lenr   decoderrT   r9   len	quantilesr:   horizon_ff_layerr   )rA   r3   rB   s     r(   r8   "TimesFmModelForPrediction.__init__  s     !00!00#F+ !5))--S9I9I5J1JK00!
 	r'   Nr   r
  rQ  r   .c           	      2   Uc  U R                   n/ / pTU GH  nUR                  S   n[        R                  " XpR                  -   UR
                  UR                  S9nXs:  a  X7-
  n	[        R                  " [        R                  " XR
                  UR                  S9U/SS9n[        R                  " [        R                  " XR
                  UR                  S9U/SS9nOXs:  a  Xc* S nXU R                  -   * S nUR                  U5        UR                  U5        GM     [        R                  " USS9[        R                  " USS94n
UbC  U
[        R                  " US[        U5       [        R                  S9R                  SS5      4-   n
U
$ )a  Pad/truncate input time series to `context_len` and build a padding mask.

Args:
    inputs: A list of 1d Tensors. Each Tensor is the context time series of a single forecast task.
    freq: Optional list of frequencies (returned as a tensor when provided).
    context_len: Optional context length override (defaults to `self.context_len`).

Returns:
    Tuple of (padded_inputs, padding_mask) and optionally a freq tensor.
Nr   r}   r   rm   r   r   )rQ  r   r#   zerosrS  rn   r~   r   r!  appendstackr  rU  r,  r   )rA   r   r
  rQ  input_tsinput_paddingts	input_lenr&  num_front_padresults              r(   _preprocess%TimesFmModelForPrediction._preprocess%  si    **K"$b-BIkk).>.>">bhhWYW`W`aG& + 7YYMRTR[R[ \^`aghi))UZZXXV]VdVd%egn$ouvw(&!$2B2B$B"C"EFOOB  )  ++hA.Mq0QRu||D3v;,?u{{S[[\^`abddFr'   model_outputr  c                 
   U R                  U5      nUR                  u  pEnUR                  XEU R                  R                  [        U R                  R                  5      S-   5      nUu  pxX8SS2SSS4   -  USS2SSS4   -   $ )z*Postprocess output of stacked transformer.r   N)rW  r   r   r3   rR  rU  rV  )	rA   re  r  	output_tsbnr   r  r  s	            r(   _postprocess_output-TimesFmModelForPrediction._postprocess_outputJ  s     )),7	 //aNN1)C)CSI^I^E_bcEcd		D$!4551dD$;N8OOOr'   predictionstargetsc                 ,   / n[        U R                  R                  5       HL  u  pEX!SU4   -
  n[        R                  " US-
  U-  XV-  5      nUR                  UR                  5       5        MN     [        R                  " U5      R                  5       $ )N.r   )	enumerater3   rV  r#   rt   r[  meanr\  )rA   rl  rm  lossesiqerrorsr.   s           r(   _quantile_loss(TimesFmModelForPrediction._quantile_lossY  sz    dkk334DA3622F99a!ev-qz:DMM$))+& 5 {{6"''))r'   r   window_sizefuture_valuesforecast_context_lenreturn_forecast_on_contexttruncate_negativer   c           
      :	   Uc  U R                   n	OUn	US   R                  n
U Vs/ s H  oU	* S PM
     nn[        R                  " [        R                  " U Vs/ s H  n[        R                  " U5      PM     sn5      5      nUb]  / n/ n[        U5       HC  u  nnUR                  U R                  X5      5        Uc  M+  UR                  UU   /S-  5        ME     UnUb  UnUc$  [        R                  S5        S/[        U5      -  nU R                  X5      u  nnnUR                  U
5      nUR                  U
5      nUR                  U
5      nUnUR                  S   n/ nUR                  S   UR                  S   U R                  -   :w  a8  [        SUR                  S    SUR                  S    SU R                   35      eU R                   R"                  nU R                  U-   S-
  U-  n[%        U5       GH4  nUSS2SUR                  S   24   nUSS2U	* S24   nUSS2U	* S24   nU R&                  " SUUUS	.UD6nU R)                  UR*                  UR,                  UR.                  45      nU(       am  US:X  ag  USS2SS
2SU R                   R0                  2SS24   nUR3                  UR5                  S5      S
UR5                  S5      5      nUR7                  U5        USS2S
SU2S4   nUSS2S
SU2SS24   nUR7                  U5        [        R8                  " UU/S
S9nGM7     U(       aF  [        R8                  " USS9SS2SUU R                   R0                  -
  U R                  -   2SS24   nO+[        R8                  " USS9SS2SU R                  2SS24   nUSS2SS2S4   nUb*  USSS2S4   USSS2S4   -   nUSSS2S4   USSS2S4   -   nUS:  a5  U(       a.  [        R:                  " US5      n[        R:                  " US5      nSn Ub:  [<        R>                  " UU5      n!U RA                  USS2SS2SS24   U5      n"U!U"-   n [C        WR*                  URD                  URF                  UUU S9$ s  snf s  snf )a  
past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
    Past values of the time series that serves as input to the model.
freq (`torch.LongTensor` of shape `(batch_size,)`):
    Frequency indices for the time series data.
window_size (`int`, *optional*):
    Window size of trend + residual decomposition. If None then we do not do decomposition.
future_values (`torch.Tensor`, *optional*):
    Optional future time series values to be used for loss computation.
forecast_context_len (`int`, *optional*):
    Optional max context length.
return_forecast_on_context (`bool`, *optional*):
    True to return the forecast on the context when available, i.e. after the first input patch.
truncate_negative (`bool`, *optional*):
    Truncate to only non-negative values if any of the contexts have non-negative values,
    otherwise do nothing.

Example:

```python
>>> from transformers import TimesFmModelForPrediction

>>> model = TimesFmModelForPrediction.from_pretrained("google/timesfm-2.0-500m-pytorch")

>>> forecast_input = [torch.linspace(0, 20, 100).sin(), torch.linspace(0, 20, 200).sin(), torch.linspace(0, 20, 400).sin()]
>>> frequency_input = torch.tensor([0, 1, 2], dtype=torch.long)

>>> # Generate
>>> with torch.no_grad():
>>>     outputs = model(past_values=forecast_input, freq=frequency_input, return_dict=True)
>>>     point_forecast_conv = outputs.mean_predictions
>>>     quantile_forecast_conv = outputs.full_predictions
```
Nr   r   z6No frequency provided via `freq`. Default to high (0).r   z=Length of paddings must match length of input + horizon_len: z != z + )r   r	  r
  r   r   )axis.r   )r  r   r   r,   r-   r.   r   )$rQ  r~   r#   r   r\  ro  extend_timesfm_moving_averageloggerinforU  rc  r+  r   rS  r   r3   rR  r   rT  rj  r  r   r   r   r   sizer[  concatenatemaximumrF   mse_lossru  r*   r   r   )#rA   r   r
  rw  rx  ry  rz  r{  r   fcontext_lenr~   r_  r   inp_min
new_inputs	new_freqsrr  r]  r^  inp_freq	final_outrQ  full_outputsoutput_patch_lennum_decode_patches
step_indexcurrent_paddingdecoder_outputfprop_outputsnew_full_tsnew_tsmean_outputsr.   r  quantile_losss#                                      r(   rM   !TimesFmModelForPrediction.forwarda  s   ^  '++L/LQ&&/:;{l]^${;))EKK(H22(HIJ"JI"6*2!!$">">r"OP#$$d1gY]3 +  F <KKPQ3V$D,0,<,<V,J)-;;v&%((0;;v&	ooa(q!Y__Q%7$:J:J%JJ!''*+4	0B/C3tGWGWFXZ   ;;55"..1AAAEJZZ 23J+Aq9??13E/E,EFO \MN!23H+A}~,=>M,0LL -$$1- 	-N !4400##^%9%9:M
 *jAo+Ass4Ndkk6N6N4NPQ,QR)11+2B2B12Er;K[K[\]K^_##K0"1b*;+;*;Q#>?F'2/@0@/@!(CDK,))9f*=BGI/ 42 & ,,\BPkDKK$<$<<t?O?OOPRSSL !,,\B1a$JZJZFZ\]C]^L#Aq!G,"'1c	2\!$Q$)5LLL'1c	2\!$Q$)5LLLa<- ==s;L ==s;L$zz,>H //Q12X0FVMm+D),>>%00(66))
 	
c <(Hs   R Rr(  c                 .   [         R                  " XS-
  S4SS5      n[        R                  " XR                  U R
                  S9U-  n[         R                  " UR                  SSS5      UR                  SSS5      5      R                  5       nX@U-
  /$ )zCCalculates the moving average using PyTorch's convolution function.r   r   constantr}   r   )	rF   r   r#   r!  rn   r~   conv1dr   squeeze)r(  rw  
arr_paddedkernelsmoothed_arrs        r(   r  1TimesFmModelForPrediction._timesfm_moving_average  s}     UU3q! 4j!D
KyyL{Zxx
1b 96;;q!R;PQYY[L011r'   )r3   rQ  rT  rW  rS  r   )NNNNFF)r   r   r    r!   r"   r   r8   r   r#   r$   r   r   rc  rj  ru  r   r   rL  r   r   r*   rM   rK  listr  r&   rQ   rR   s   @r(   rN  rN    s   9} ( lp#u||,#4<SMD4H#^adh^h#	u||S 	!#JP!LLP16u||U\\7Q1RP	P*%,, * *RWR^R^ *  59"&-1+/+0"'L
ell+L
 u||c)*T1L
 4Z	L

 ||d*L
 "DjL
 %)L
  L
 +,L
 
$L
  L
\ 2U\\ 2 2U\\HZ 2 2r'   rN  )rN  r   r   )8r"   rr   collections.abcr   r   dataclassesr   r#   torch.nnr;   torch.nn.functional
functionalrF    r   r   modeling_flash_attention_utilsr   modeling_outputsr	   modeling_utilsr
   r   processing_utilsr   utilsr   r   r   r   utils.genericr   utils.output_capturingr   llama.modeling_llamar   (phi4_multimodal.modeling_phi4_multimodalr   configuration_timesfmr   
get_loggerr   r  r   r*   Moduler1   rT   rf   ri   r   r   r   r   rN  __all__r   r'   r(   <module>r     sM     . !     & B / F & R R 7 5 / U 0 
		H	% 
	&O 	&  	& 
- -  - ,!299 !,	\ 	+ +\9)ryy 9)x")) @ _  > j) j jZk2 6 k2\ Rr'   