
    qi(                     B   d dl Z d dlmZmZ d dlmZ d dlZd dlmZ d dl	mc m
Z ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZmZ ddlm Z   ejB                  e"      Z#ee G d de                    Z$ee G d de                    Z% G d dejL                        Z' G d dejL                        Z( ed       G d dejL                               Z) G d dejL                        Z*	 d/dejL                  dejV                  dejV                  dejV                  dejV                  dz  d e,d!e,d"ee   fd#Z- G d$ d%ejL                        Z. G d& d'ejL                        Z/e G d( d)e             Z0e G d* d+e0             Z1 G d, d-e0      Z2g d.Z3y)0    N)CallableSequence)	dataclass   )initialization)use_kernel_forward_from_hub)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)TransformersKwargsauto_docstringcan_return_tuplelogging   )TimesFmConfigc                   b    e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   y)TimesFmOutputz
    loc (`torch.Tensor` of shape `(batch_size, )`):
        The mean of the time series inputs.
    scale (`torch.Tensor` of shape `(batch_size,)`):
        The scale of the time series inputs.
    Nlocscale)	__name__
__module____qualname____doc__r   torchTensor__annotations__r        ^/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/timesfm/modeling_timesfm.pyr   r   *   s/      $C	#!%E5<<$%r    r   c                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	ej                  e
z  dz  ed<   y)TimesFmOutputForPredictiona  
    mean_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The mean predictions of the time series.
    full_predictions (`torch.Tensor` of shape `(batch_size, sequence_length)`):
        The full predictions of the time series including the mean and the quantiles.
    loss (`torch.Tensor` of shape `(1,)`, *optional*, returned when `future_values` is provided):
        The loss of the TimesFM model.
    Nmean_predictionsfull_predictionsloss)r   r   r   r   r$   r   r   r   r%   r&   floatr   r    r!   r#   r#   8   sI     -1ellT)0,0ellT)0(,D%,,

%,r    r#   c                   0     e Zd ZdZdef fdZddZ xZS )
TimesFmMLPzPax MLP in pytorch.configc                     t         |           |j                  }|j                  }t	        j
                  ||      | _        t	        j
                  ||      | _        t	        j                  |d      | _	        y )Nư>)normalized_shapeeps)
super__init__hidden_sizeintermediate_sizennLinear	gate_proj	down_proj	LayerNorm
layer_norm)selfr*   r1   r2   	__class__s       r!   r0   zTimesFmMLP.__init__L   s]    (("44;0AB#4kB,,Nr    c                     | j                  |      }| j                  |      }t        j                  |      }| j	                  |      }||d|d d d d d f   z
  z  }||z   S )N      ?)r8   r5   Frelur6   )r9   xpaddingsgate_inpgateoutputss         r!   forwardzTimesFmMLP.forwardU   sc    ??1%~~h'vvd|..&x1d
';!;<G{r    Nr   r   r   r   r   r0   rD   __classcell__r:   s   @r!   r)   r)   I   s    O} Or    r)   c                   (     e Zd ZdZ fdZd Z xZS )TimesFmResidualBlockzTimesFM residual block.c                     t         |           || _        || _        || _        t        j                  ||      | _        t        j                         | _	        t        j                  ||      | _
        t        j                  ||      | _        y rE   )r/   r0   
input_dimshidden_dimsoutput_dimsr3   r4   input_layerSiLU
activationoutput_layerresidual_layer)r9   rL   rM   rN   r:   s       r!   r0   zTimesFmResidualBlock.__init__b   sk    $&&99Z='')IIk;? ii
K@r    c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }||z   S rE   )rO   rQ   rR   rS   )r9   r?   hiddenoutputresiduals        r!   rD   zTimesFmResidualBlock.forwardm   sK    !!!$(""6*&&q)  r    )r   r   r   r   r0   rD   rG   rH   s   @r!   rJ   rJ   _   s    !	A!r    rJ   RMSNormc                   h     e Zd Zddeddf fdZdej                  dej                  fdZd Z xZ	S )	TimesFmRMSNormr.   returnNc                     t         |           t        j                  t	        j
                  |            | _        || _        y)z=
        TimesFmRMSNorm is equivalent to T5LayerNorm
        N)r/   r0   r3   	Parameterr   onesweightvariance_epsilon)r9   r1   r.   r:   s      r!   r0   zTimesFmRMSNorm.__init__w   s1     	ll5::k#:; #r    hidden_statesc                 "   |j                   }|j                  t        j                        }|j	                  d      j                  dd      }|t        j                  || j                  z         z  }| j                  |j                  |      z  S )N   T)keepdim)	dtypetor   float32powmeanrsqrtr`   r_   )r9   ra   input_dtypevariances       r!   rD   zTimesFmRMSNorm.forward   sy    #))%((7 $$Q',,R,>%Ht?T?T4T(UU{{]--k:::r    c                 ^    t        | j                  j                         d| j                   S )Nz, eps=)tupler_   shaper`   )r9   s    r!   
extra_reprzTimesFmRMSNorm.extra_repr   s*    ))*+6$2G2G1HIIr    )r,   )
r   r   r   r'   r0   r   r   rD   rq   rG   rH   s   @r!   rZ   rZ   u   s7    $ $$ $;U\\ ;ell ;Jr    rZ   c                   0     e Zd ZdZdef fdZddZ xZS )TimesFmPositionalEmbeddingz6Generates position embedding for a given 1-d sequence.r*   c           
         t         |           |j                  }|j                  }||c| _        | _        |j                  | _        | j
                  dz  }t        j                  t        |      t        |      z        t        |dz
  d      z  }| j                  d|t        j                  t        j                  |t        j                        | z        z         y )Nrc   r   inv_timescalesrf   )r/   r0   min_timescalemax_timescaler1   embedding_dimsmathlogr'   maxregister_bufferr   exparangerh   )r9   r*   rw   rx   num_timescaleslog_timescale_incrementr:   s         r!   r0   z#TimesFmPositionalEmbedding.__init__   s    ,,,,1>.D.$00,,1"&((5+?%BV+V"WZ]^lop^prsZt"tEIIell>&W[rZr&rss	
r    c                 N   ||t        d      |Jt        j                  |t        j                  | j                  j
                        j                  d      }n'|j                  dk7  rt        d|j                          |j                  g |j                  d | j                  j                  ddd      z  }t        j                  t        j                  |      t        j                  |      gd      }t        j                  |ddd| j                  dz  f      }|S )	a  Generates a Tensor of sinusoids with different frequencies.

        Args:
            seq_length: an optional Python int defining the output sequence length.
              if the `position` argument is specified.
            position: [B, seq_length], optional position for each token in the
              sequence, only required when the sequence is packed.

        Returns:
            [B, seqlen, D] if `position` is specified, else [1, seqlen, D]
        z.Either position or seq_length must be providedrf   devicer   rc   z*position must be 2-dimensional, got shape r   rd   dim)
ValueErrorr   r   rh   ru   r   	unsqueezendimrp   viewcatsincosr=   padry   )r9   
seq_lengthpositionscaled_timesignals        r!   rD   z"TimesFmPositionalEmbedding.forward   s     
 2MNN||JemmDL_L_LfLfgqqrstH]]aI(..IYZ[[#hmm7X^^7Q7$:M:M:R:RSTVWY[:\\EIIk2EIIk4JKQRS v1a)<)<q)@ABr    NNrF   rH   s   @r!   rs   rs      s    @
} 
r    rs   modulequery_states
key_statesvalue_statesattention_maskscalingdropoutkwargsc                    t        j                  ||j                  dd            |z  }|||z   }t        j                  j                  |dt         j                        j                  |j                        }t        j                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )Nrc   r   rd   )r   rf   )ptrainingr   )r   matmul	transposer3   
functionalsoftmaxrh   rg   rf   r   r   
contiguous)
r   r   r   r   r   r   r   r   attn_weightsattn_outputs
             r!   simple_eager_attention_forwardr      s     <<j.B.B1a.HIGSL!#n4==((2U]](SVVWcWiWijL==((6??([L,,|\:K''1-88:K$$r    c                        e Zd ZdZdedef fdZdej                  dej                  fdZ		 dd	ej                  d
ej                  dz  de
e   deej                  ej                  dz  f   fdZ xZS )TimesFmAttentionzlImplements the attention used in TimesFM. One key difference is that there is _per_dim_scaling of the query.r*   	layer_idxc                    t         |           || _        d| _        |j                  | _        || _        |j                  | _        |j                  | _        |j                  | _	        | j                  | j                  z  | _
        | j                  | j                  z  | _        t        j                  t        j                  | j                  f            | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  | j                  z        | _        t        j"                  | j                  | j                  z  | j                        | _        y )NT)r/   r0   r*   	is_causalattention_dropoutr   num_attention_heads	num_headsr1   head_dimq_sizekv_sizer3   r]   r   emptyr   r4   q_projk_projv_projo_projr9   r*   r   r:   s      r!   r0   zTimesFmAttention.__init__   s3   !'!9!9"33!--nnt}}4~~5||EKK0@$ABii 0 0$..4==2PQii 0 0$..4==2PQii 0 0$..4==2PQii >@P@PQr    queryr[   c                     t        j                  | j                        j                  dt	        j
                  | j                        z        }||d d d d d f   z  S )Ng^$3eG?)r=   softplusr   mulrz   sqrtr   )r9   r   r   s      r!   _scale_queryzTimesFmAttention._scale_query   sJ    

4<<(,,[499T]];S-STuT4q0111r    Nra   r   r   c                    |j                   d d }g |d| j                  }| j                  |      j                  |      j	                  dd      }| j                  |      }| j                  |      j                  |      j	                  dd      }| j                  |      j                  |      j	                  dd      }t        j                  | j                  j                  t              }	 |	| ||||f| j                  sdn| j                  dd|\  }
} |
j                  g |d j!                         }
| j#                  |
      }
|
|fS )Nrd   r   rc           r<   )r   r   )rp   r   r   r   r   r   r   r   r   get_interfacer*   _attn_implementationr   r   r   reshaper   r   )r9   ra   r   r   input_shapehidden_shaper   r   r   attention_interfacer   r   s               r!   rD   zTimesFmAttention.forward   sW    $))#2.88b8$--8{{=166|DNNqRST((6[[/44\BLLQPQR
{{=166|DNNqRST(?(M(MKK,,.L)
 %8	%
  $}}C$2H2H	%
 	%
!\ *k));;;;FFHkk+.L((r    rE   )r   r   r   r   r   intr0   r   r   r   r   r	   ro   rD   rG   rH   s   @r!   r   r      s    vR} R R(2%,, 25<< 2 /3)||) t+) -.	)
 
u||U\\D00	1)r    r   c                        e Zd ZdZdedef fdZ	 ddej                  dej                  dej                  de	d	e
ej                  d
z  ej                  f   f
dZ xZS )TimesFmDecoderLayerzTransformer layer.r*   r   c                     t         |           t        ||      | _        t	        |      | _        t        |j                  |j                        | _	        y )N)r   )r.   )
r/   r0   r   	self_attnr)   mlprZ   r1   rms_norm_epsinput_layernormr   s      r!   r0   zTimesFmDecoderLayer.__init__  sC    )&IFf%-f.@.@fFYFYZr    ra   r   r@   output_attentionsr[   Nc                     |}| j                  |      }| j                  |||      \  }}||z   }| j                  ||      }||fS )N)ra   r   r   )r@   )r   r   r   )r9   ra   r   r@   r   rW   scoress          r!   rD   zTimesFmDecoderLayer.forward  se     !,,]; $')/ !/ !
v
 !=0 B}$$r    )F)r   r   r   r   r   r   r0   r   r   boolro   rD   rG   rH   s   @r!   r   r   
  sy    [} [ [ #(%||% % ,,	%
  % 
u||d"ELL0	1%r    r   c                   h     e Zd ZU eed<   dZdgZdZdZdZ	 e
j                          fd       Z xZS )TimesFmPreTrainedModelr*   timesfmr   past_values)timeTc           
      "   t         |   |       t        |t              r t	        j
                  |j                         y t        |t              r|j                  dz  }|j                  |j                  }}t        j                  t        |      t        |      z        t        |dz
  d      z  }t	        j                  |j                   |t#        j$                  t#        j&                  |t"        j(                        | z        z         y y )Nrc   r   rv   )r/   _init_weights
isinstancer   initones_r   rs   ry   rx   rw   rz   r{   r'   r|   copy_ru   r   r~   r   rh   )r9   r   r   rx   rw   r   r:   s         r!   r   z$TimesFmPreTrainedModel._init_weights4  s    f%f./JJv~~& :;#22a7N+1+?+?AUAU=M&*hhu]/CeMFZ/Z&[^a"A_ '# JJ%%))ELLu}}MQhPhhij <r    )r   r   r   r   r   base_model_prefix_no_split_modulesmain_input_nameinput_modalities_supports_sdpar   no_gradr   rG   rH   s   @r!   r   r   +  sB    !./#O NU]]_ r    r   c                       e Zd Zdef fdZdej                  dej                  deej                  eej                  ej                  f   f   fdZe	e
	 	 ddej                  dej                  d	ej                  d
ededefd              Ze	 ddej                  dz  dedej"                  dej$                  dedej                  dz  fd       Zedej                  dej                  deej                  ej                  f   fd       Zedej                  dej                  dej                  fd       Z xZS )TimesFmModelr*   c           	         t         |   |       || _        t        d|j                  z  |j
                  |j                        | _        t        j                  |j                  |j
                        | _        t        j                  t        |j                        D cg c]  }t        ||       c}      | _        | j                  j"                  rt%        |      | _        | j)                          y c c}w )Nrc   rL   rN   rM   )num_embeddingsembedding_dim)r*   )r/   r0   r*   rJ   patch_lengthr1   r2   input_ff_layerr3   	Embedding	freq_sizefreq_emb
ModuleListrangenum_hidden_layersr   layersuse_positional_embeddingrs   position_emb	post_initr   s      r!   r0   zTimesFmModel.__init__I  s     26...**00

 F4D4DTZTfTfgmmEJ6KcKcEde	 3e
 ;;// :& ID 	 fs   "C9inputspatched_padsr[   c                    | j                  ||      \  }}t        j                  || j                  j                        }||ddddf   z
  |ddddf   z  }t        j
                  t        j                  || j                  j                  z
        | j                  j                  k  t        j                  | j                  j                  |j                  |j                        |      }|||ffS )zInput is of shape [B, N, P].minNr   )_timesfm_masked_mean_stdr   clampr*   	tolerancewhereabspad_valtensorrf   r   )r9   r   r   musigmarC   s         r!   _forward_transformzTimesFmModel._forward_transform\  s     11&,G	EEt{{'<'<= Bq$}--q$}1EE++IIft{{2223dkk6K6KKLL,,GMM'..Y

 U##r    r   past_values_paddingfreqr   output_hidden_statesc                    |j                   d   }|j                  |d| j                  j                        }|j                  |d| j                  j                        }	t	        j
                  t	        j                  |	dz
        | j                  j                  k  t	        j                  d|j                  |j                        |      }t	        j
                  t	        j                  || j                  j                  z
        | j                  j                  k  t	        j                  d|	j                  |	j                        |	      }	| j                  ||	      \  }}
|d|	z
  z  }t	        j                  ||	gd      }| j                  |      }t	        j                  |	d      d   }| j                  j                   r]| j#                  |j                   d         }t	        j$                  |g|j                   d   z  d      }| j'                  ||      }||z  }| j)                  |      }||z  }|}| j+                  ||j                   d   |j                  |j                  d	      }g }g }| j,                  d
| j                  j.                   D ]8  } |||||      \  }}|r|j1                  |       |s(|j1                  |       : |r|g|z   }nd
}t3        |||r|nd
|
d   |
d         S )a  
        past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Past values of the time series that serves as input to the model.
        past_values_padding (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
            The padding indicator of the time series.
        freq (`torch.LongTensor` of shape `(batch_size,)`):
            Frequency indices for the time series data.
        r   rd   r<   r   r   r   r   T)r   sequence_lengthrf   r   r   N)ra   r   r@   r   )last_hidden_statera   
attentionsr   r   )rp   r   r*   r   r   r   r   r   r   rf   r   r   r  r   r   r   r   r   concat_timesfm_shift_padded_seqr   _prepare_4d_attention_maskr   r   appendr   )r9   r   r  r  r   r  r   bsizepatched_inputsr   statsconcat_inputsmodel_inputpatched_paddingpos_embf_embra   r   all_attentionsall_hidden_stateslayerr   s                         r!   rD   zTimesFmModel.forwardl  s   ( !!!$$))%T[[5M5MN*//r4;;;S;STIIlS()DKK,A,AALLN$8$8AVAVW

 {{IInt{{':'::;dkk>S>SSLLL$6$6|?R?RS

 !% 7 7 U (3+=>		><"@bI))-8  ))Lb9!<;;//''(9(9!(<=GllG9{/@/@/C#CKG44_gNG7"Kd#u $88*)//2%% '' 9 
 [[!@4;;#@#@A 
	8E$)+-("3	%!FM !%%f-#!((7
	8  !,0A A $++):~a(
 	
r    r   Nr  rf   r   r   c                    |j                   rt        j                  |      j                  nt        j                  |      j                  }| &| j                  | j                  d   ddd      } | |z  } |rbt        j                  t        j                  ||f||      |z  d      }|j                  dd||      }| t        j                  | |      } | S |} | S )a  
        Creates 4D attention mask and combines causal and padding masks if needed.

        Args:
            attention_mask: Optional tensor of shape (batch_size, seq_length) containing padding mask
            sequence_length: Length of the sequence
            dtype: Data type of the mask
            device: Device of the mask
            is_causal: Whether to apply causal masking

        Returns:
            4D attention mask of shape (batch_size, 1, seq_length, seq_length)
        r   r   rd   r   )diagonal)
is_floating_pointr   finfor   iinfor   rp   triur^   minimum)r   r  rf   r   r   	min_valuecausal_masks          r!   r  z'TimesFmModel._prepare_4d_attention_mask  s    , /4.E.EEKK&**5;;W\K]KaKa	 %+001E1Ea1H!QPRSN+i7N **

O_=USYZ]ffK &**1a/RK )!&~{!K  "-r    paddingc                 F   dt         j                  fd}t        j                  d|z
  d      } ||      }t        j                  | j                  d         }| ||ddf   }|||ddf   }d|z
  }t        j                  |d      }	t        j
                  |	d	      }	t        j                  ||z  d      }
|
|	z  }||j                  d
      z
  |z  }t        j                  |dz  d      |	z  }t        j
                  |d	      }t        j                  |      }||fS )a  Calculates mean and standard deviation of `inputs` across axis 1.

        It excludes values where `padding` is 1.

        Args:
            inputs: A PyTorch tensor of shape [b, n, p].
            padding: A PyTorch tensor of shape [b, n, p] with values 0 or 1.

        Returns:
            A tuple containing the mean and standard deviation.
            We return the statistics of the first patch with more than three non-padded values.
        arrc                 (   t        j                  | dk\  j                  t         j                        d      }| dk\  j                  t         j                        j	                  d      }t        j
                  |dk(  | j                  d   dz
  |      S )Nr   r   r   r   )r   argmaxrg   int32sumr   rp   )r%  indicesrow_sums      r!   _get_patch_indexz?TimesFmModel._timesfm_masked_mean_std.<locals>._get_patch_index  sk    llC1H==#=1EGaxmmEKK0444;G;;w!|SYYq\A-=wGGr    r   rc   r   r   Nr<   r   rd   r   )r   r   r)  r   rp   r   r   r   )r   r#  r,  pad_sumpatch_indicesbidxsr%  r   masknum_valid_elements
masked_summasked_meanmasked_centered_arr
masked_var
masked_stds                  r!   r   z%TimesFmModel._timesfm_masked_mean_std  s    	H%,, 	H
 ))AKQ/(1V\\!_-UM1,-e]A-. 3w #YYt3"[[);E YYsTzq1
 #55  #[%:%:2%>>$FYY2A51=@RR
[[5
ZZ
+
J&&r    r0  seqc                    |j                   \  }}}| dk(  }|j                  t        j                        j	                  d      }d||j                  d       <   t        j                  ||j                        j                  ddd      j                  |d|      }||ddddf   z
  |z  }|j                  d|      }	|	S )zShifts rows of seq based on the first 0 in each row of the mask.

        Args:
            mask: mask tensor of shape [B, N]
            seq: seq tensor of shape [B, N, P]

        Returns:
            The shifted sequence.
        r   r   r   rd   )r   N)rp   rg   r   r(  r'  anyr   r   r   expandgather)
r0  r7  
batch_sizenum_seqfeature_dimnew_maskr*  	idx_rangeshifted_idxshifted_seqs
             r!   r  z&TimesFmModel._timesfm_shift_padded_seq#  s     ,/99(
G[%)QY ++ekk*11a18 )+!$$% LL<AA!RKRRS]_acno	 !71dD=#99WD jjK0r    )FF)T)r   r   r   r   r0   r   r   ro   r  r   r   
LongTensorr   r   rD   staticmethodr   rf   r   r  r   r  rG   rH   s   @r!   r   r   G  s   } &$ll$27,,$	u||U5<<#=>>	?$   #(%*V
\\V
 #--V
 ll	V

  V
 #V
 
V
  V
p  +t+++ {{+ 	+
 + 
	+ +Z ,' ,' ,'QVW\WcWcejeqeqWqQr ,' ,'\  5<< ELL  r    r   c                   `    e Zd ZdZdef fdZ	 ddeej                     dee	   dz  de	dz  de
ej                  d	f   fd
Zdej                  de
ej                  ej                  f   dej                  fdZdej                  dej                  dej                  fdZee	 	 	 	 	 	 	 	 ddeej                     deej                  e	z     dz  de	dz  dej                  dz  de	dz  dedededz  dedz  defd              Zedej                  de	deej                     fd       Z xZS )TimesFmModelForPredictionz/TimesFM model for quantile and mean prediction.r*   c                 J   t         |   |       || _        |j                  | _        |j
                  | _        t        |      | _        t        |j                  |j
                  dt        |j                        z   z  |j                        | _        | j                          y )Nr   r   )r/   r0   r*   context_lengthcontext_lenhorizon_lengthhorizon_lenr   decoderrJ   r1   len	quantilesr2   horizon_ff_layerr   )r9   r*   r:   s     r!   r0   z"TimesFmModelForPrediction.__init__G  s     !00!00#F+ !5))--S9I9I5J1JK00!
 	r    Nr   r  rI  r[   .c                 X   || j                   }g g }}|D ]  }|j                  d   }t        j                  || j                  z   |j
                  |j                        }||k  r||z
  }	t        j                  t        j                  |	|j
                  |j                        |gd      }t        j                  t        j                  |	|j
                  |j                        |gd      }n||kD  r|| d }||| j                  z    d }|j                  |       |j                  |        t        j                  |d      t        j                  |d      f}
|E|
t        j                  |dt        |       t        j                        j                  dd      fz   }
|
S )a  Pad/truncate input time series to `context_len` and build a padding mask.

        Args:
            inputs: A list of 1d Tensors. Each Tensor is the context time series of a single forecast task.
            freq: Optional list of frequencies (returned as a tensor when provided).
            context_len: Optional context length override (defaults to `self.context_len`).

        Returns:
            Tuple of (padded_inputs, padding_mask) and optionally a freq tensor.
        Nr   r   r   rv   rd   r   )rI  rp   r   zerosrK  rf   r   r   r^   r  stackr   rM  r(  r   )r9   r   r  rI  input_tsinput_paddingts	input_lenr#  num_front_padresults              r!   _preprocessz%TimesFmModelForPrediction._preprocessZ  s    **K"$b- 	*BIkk)d.>.>">bhhWYW`W`aG;& +i 7YYMRTR[R[ \^`aghi))UZZRXXV]VdVd%egn$ouvw[(&!K$2B2B$B"C"EFOOB  )	* ++hA.Mq0QRu||D3v;,?u{{S[[\^`abddFr    model_outputr  c                    | j                  |      }|j                  \  }}}|j                  ||| j                  j                  t        | j                  j                        dz         }|\  }}||dddddf   z  |dddddf   z   S )z*Postprocess output of stacked transformer.r   N)rO  rp   r   r*   rJ  rM  rN  )	r9   rZ  r  	output_tsbn_r  r  s	            r!   _postprocess_outputz-TimesFmModelForPrediction._postprocess_output  s     )),7	 //1aNN1a)C)CSI^I^E_bcEcd		E5D$!4551dD$;N8OOOr    predictionstargetsc                 *   g }t        | j                  j                        D ]M  \  }}||d|f   z
  }t        j                  |dz
  |z  ||z        }|j                  |j                                O t        j                  |      j                         S )N.r   )	enumerater*   rN  r   r|   r  rj   rR  )r9   ra  rb  lossesiqerrorsr&   s           r!   _quantile_lossz(TimesFmModelForPrediction._quantile_loss  s    dkk334 	'DAq{3622F99a!ev-q6z:DMM$))+&	' {{6"''))r    r   window_sizefuture_valuesforecast_context_lenreturn_forecast_on_contexttruncate_negativer   r  c
           
      n	   || j                   }n|}|d   j                  }|D cg c]  }|| d 
 }}t        j                  t        j                  |D cg c]  }t        j                  |       c}            }|Yg }g }t        |      D ]A  \  }}|j                  | j                  ||             |*|j                  ||   gdz         C |}||}|$t        j                  d       dgt        |      z  }|| j                  j                  }|	| j                  j                  }	| j                  ||      \  }}}|j                  |      }|j                  |      }|j                  |      }|}|j                   d   }g }|j                   d   |j                   d   | j"                  z   k7  r8t%        d|j                   d    d|j                   d    d| j"                         | j                  j&                  }| j"                  |z   dz
  |z  }t)        |      D ]/  }|ddd|j                   d   f   }|dd| df   }|dd| df   }| j+                  |||||		      }| j-                  |j.                  |j0                  |j2                  f      }|rl|dk(  rg|dddd
d| j                  j4                  ddf   }|j7                  |j9                  d      d
|j9                  d            }|j;                  |       |ddd
d|df   } |ddd
d|ddf   }|j;                  |       t        j<                  || gd
      }2 |rHt        j<                  |d      ddd|| j                  j4                  z
  | j"                  z   ddf   }n-t        j<                  |d      ddd| j"                  ddf   }|dddddf   }!|*|!ddddf   |!ddddf   z   }!|ddddf   |ddddf   z   }|dk\  r.|r,t        j>                  |!d      }!t        j>                  |d      }d}"|9tA        jB                  |!|      }#| jE                  |ddddddf   |      }$|#|$z   }"tG        j.                  |r|jH                  nd|	r|jJ                  nd|!||"      S c c}w c c}w )aa  
        past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length)`):
            Past values of the time series that serves as input to the model.
        freq (`torch.LongTensor` of shape `(batch_size,)`):
            Frequency indices for the time series data.
        window_size (`int`, *optional*):
            Window size of trend + residual decomposition. If None then we do not do decomposition.
        future_values (`torch.Tensor`, *optional*):
            Optional future time series values to be used for loss computation.
        forecast_context_len (`int`, *optional*):
            Optional max context length.
        return_forecast_on_context (`bool`, *optional*):
            True to return the forecast on the context when available, i.e. after the first input patch.
        truncate_negative (`bool`, *optional*):
            Truncate to only non-negative values if any of the contexts have non-negative values,
            otherwise do nothing.
        output_attentions (`bool`, *optional*):
            Whether to output the attentions.
        output_hidden_states (`bool`, *optional*):
            Whether to output the hidden states.

        Example:

        ```python
        >>> from transformers import TimesFmModelForPrediction

        >>> model = TimesFmModelForPrediction.from_pretrained("google/timesfm-2.0-500m-pytorch")

        >>> forecast_input = [torch.linspace(0, 20, 100).sin(), torch.linspace(0, 20, 200).sin(), torch.linspace(0, 20, 400).sin()]
        >>> frequency_input = torch.tensor([0, 1, 2], dtype=torch.long)

        >>> # Generate
        >>> with torch.no_grad():
        >>>     outputs = model(past_values=forecast_input, freq=frequency_input, return_dict=True)
        >>>     point_forecast_conv = outputs.mean_predictions
        >>>     quantile_forecast_conv = outputs.full_predictions
        ```
        Nr   rc   z6No frequency provided via `freq`. Default to high (0).r   z=Length of paddings must match length of input + horizon_len: z != z + )r   r  r  r   r  rd   r   )axis.r   )r	  r
  ra   r$   r%   r&   )&rI  r   r   r   rR  rd  extend_timesfm_moving_averageloggerinforM  r*   r   r  rY  rg   rp   rK  r   rJ  r   rL  r`  r	  r   r   r   r   sizer  concatenatemaximumr=   mse_lossri  r#   r
  ra   )%r9   r   r  rj  rk  rl  rm  rn  r   r  r   fcontext_lenr   rU  r   inp_min
new_inputs	new_freqsrf  rS  rT  inp_freq	final_outrI  full_outputsoutput_patch_lennum_decode_patches
step_indexcurrent_paddingdecoder_outputfprop_outputsnew_full_tsnew_tsmean_outputsr&   rx  quantile_losss%                                        r!   rD   z!TimesFmModelForPrediction.forward  s-   j  '++L/L Q&& 0;;"l]^$;;))EKK(H22(HIJ"JI"6* 42!!$">">r;"OP#$$d1gY]34  F <KKPQ3V$D$ $ = ='#';;#C#C ,0,<,<VT,J)-;;v&%((0;;v&	ooa(q!Y__Q%7$:J:J%JJ!''*+4	0B/C3tGWGWFXZ   ;;55"..1AAAEJZZ 23 	HJ+Aq9??13E/E,EFO \MN!23H+A}~,=>M!\\$$1"3%9 * N !4400##^%9%9:M
 *jAo ,Ass4Ndkk6N6N4NPQ,QR)11+2B2B12Er;K[K[\]K^_##K0 #1b*;+;*;Q#>?F'2/@0@/@!(CDK,))9f*=BGI=	H@ & ,,\BPkDKK$<$<<t?O?OOPRSSL
 !,,\B1a$JZJZFZ\]C]^L#Aq!G,"'1c	2\!$Q$)5LLL'1c	2\!$Q$)5LLLa<- ==s;L ==s;L$zz,>H //Q12X0FVMm+D),>>4E~004:N.66TX))
 	
A <(Hs   R-R2r%  c                 4   t        j                  | |dz
  dfdd      }t        j                  || j                  | j
                        |z  }t        j                  |j                  ddd      |j                  ddd            j                         }|| |z
  gS )zCCalculates the moving average using PyTorch's convolution function.r   r   constantr   rd   )	r=   r   r   r^   rf   r   conv1dr   squeeze)r%  rj  
arr_paddedkernelsmoothed_arrs        r!   rr  z1TimesFmModelForPrediction._timesfm_moving_average=  s     UU3q! 4j!D
KsyyL{Zxx
1b 96;;q!R;PQYY[cL011r    r   )NNNNFFNN)r   r   r   r   r   r0   r   r   r   r   ro   rY  r`  ri  r   r   r   r#   rD   rD  listrr  rG   rH   s   @r!   rF  rF  D  s   9} ( lp#u||,#4<SMD4H#^adh^h#	u||S 	!#JP!LLP16u||U\\7Q1RP	P*%,, * *RWR^R^ *  59"&-1+/+0"')-,0c
ell+c
 u||c)*T1c
 4Z	c

 ||d*c
 "Djc
 %)c
  c
  $;c
 #Tkc
 
$c
  c
J 2U\\ 2 2U\\HZ 2 2r    rF  )rF  r   r   )r   )4rz   collections.abcr   r   dataclassesr   r   torch.nnr3   torch.nn.functionalr   r=    r   r   integrationsr   modeling_flash_attention_utilsr	   modeling_outputsr
   modeling_utilsr   r   processing_utilsr   utilsr   r   r   r   configuration_timesfmr   
get_loggerr   rs  r   r#   Moduler)   rJ   rZ   rs   r   r'   r   r   r   r   r   rF  __all__r   r    r!   <module>r     s  *  . !     & 7 B / F & R R 0 
		H	% 	&O 	&  	& - -  - ,!299 !, Y'JRYY J (J(+ +j %II%,,% % ,,	%
 LL4'% % % '(%,9)ryy 9)x%")) %B _  6 y) y yxB2 6 B2J Rr    