
    qiL                       d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z
 ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZmZ ddlmZmZmZm Z  ddl!m"Z"  e jF                  e$      Z%	 	 dodejL                  dejN                  dejN                  dejN                  dejN                  dz  de(dz  de(dee   fdZ) G d dejL                        Z* G d dejL                        Z+	 	 	 dpdejN                  d e(d!e,dz  d"e-d#e.f
d$Z/	 	 dqdejN                  d%e,e.z  d!e,dz  d#e.fd&Z0 G d' d(ejL                        Z1 G d) d*ejL                        Z2 G d+ d,ejL                        Z3e G d- d.e             Z4 G d/ d0ejL                        Z5 G d1 d2ejL                        Z6 G d3 d4e4      Z7e ed56       G d7 d8e                    Z8e ed96       G d: d;e                    Z9e ed<6       G d= d>e                    Z:e ed?6       G d@ dAe                    Z;e edB6       G dC dDe                    Z<e edE6       G dF dGe                    Z=dHej|                  j~                  dIejN                  dJejN                  fdKZ@drdLejN                  dMejN                  dz  dJejN                  fdNZA G dO dPejL                        ZB G dQ dRejL                        ZC G dS dTejL                        ZD G dU dVejL                        ZEe G dW dXe4             ZF G dY dZejL                        ZG ed[6       G d\ d]e4             ZH G d^ d_ejL                        ZI ed`6       G da dbe4             ZJ edc6       G dd deejL                               ZK edf6       G dg dhe4             ZL G di djejL                        ZM edk6       G dl dme4             ZNg dnZOy)szPyTorch PatchTST model.    N)Callable)	dataclass)nn   )initialization)ACT2CLS)is_deepspeed_zero3_enabled)FlashAttentionKwargs)BaseModelOutput)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)NegativeBinomialOutputNormalOutputStudentTOutput)ModelOutputTransformersKwargsauto_docstringlogging   )PatchTSTConfigmodulequerykeyvalueattention_maskscalingdropoutkwargsc                    ||j                  d      dz  }t        j                  ||j                  dd            |z  }|||z   }t        j
                  j                  |d      }t        j
                  j                  ||| j                        }t        j                  ||      }	|	j                  dd      j                         }	|	|fS )N         r   dim)ptrainingr   )
sizetorchmatmul	transposer   
functionalsoftmaxr   r'   
contiguous)
r   r   r   r   r   r   r   r   attn_weightsattn_outputs
             `/opt/pipecat/venv/lib/python3.12/site-packages/transformers/models/patchtst/modeling_patchtst.pyeager_attention_forwardr2   '   s     **R.D( <<s}}Q':;gEL!#n4==((2(>L==((6??([L,,|U3K''1-88:K$$    c                   (    e Zd ZdZ	 	 	 	 	 ddedededededed	edz  f fd
Z	 	 	 dde	j                  de	j                  dz  de	j                  dz  dedz  dee   dee	j                  e	j                  dz  ee	j                     dz  f   fdZ xZS )PatchTSTAttentionz=Multi-headed attention from 'Attention Is All You Need' paperN	embed_dim	num_headsr   
is_decoderbias	is_causalconfigc                 
   t         |           || _        || _        || _        ||z  | _        || _        | j
                  |z  | j                  k7  rt        d| j                   d| d      | j
                  dz  | _        || _	        || _
        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        y )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: ).r"   r9   )super__init__r6   r7   r   head_dimr;   
ValueErrorr   r8   r:   r   Lineark_projv_projq_projout_proj)	selfr6   r7   r   r8   r9   r:   r;   	__class__s	           r1   r@   zPatchTSTAttention.__init__G   s     	""!Y.MMI%$..8MdnnM]$YKr3  }}d*$"ii	94@ii	94@ii	94@		)YTBr3   hidden_stateskey_value_statesr   output_attentionsr   returnc                    |du}|j                   dd \  }}|r|j                   d   n|}	||d| j                  f}
||	d| j                  f} | j                  |      j                  |
 j	                  dd      }|r|n|} | j                  |      j                  | j	                  dd      } | j                  |      j                  | j	                  dd      }t        j                  | j                  j                  t              } || ||||f| j                  sdn| j                  | j                  |d|\  }}|j                  ||d      j!                         }| j#                  |      }||dfS )z#Input shape: Batch x Time x ChannelNr!   r   r#           )r   r   rL   )shaperA   rF   viewr+   rD   rE   r   get_interfacer;   _attn_implementationr2   r'   r   r   reshaper.   rG   )rH   rJ   rK   r   rL   r   is_cross_attentionbsztgt_lensrc_lenq_input_shapekv_input_shapequery_statescurrent_states
key_statesvalue_statesattention_interfacer0   r/   s                      r1   forwardzPatchTSTAttention.forwardf   s    .T9 %**3B/W/A"((+wgr4==9wDMM: 7t{{=166FPPQRTUV-?)]5T[[055~FPPQRTUV
7t{{>277HRRSTVWX(?(M(MKK,,.E)
 %8
%
  $}}C$,,LL/
%
 
%
!\ "))#w;FFHmmK0L$..r3   )rO   FTFN)NNF)__name__
__module____qualname____doc__intfloatboolr   r@   r)   Tensorr   r
   tupler`   __classcell__rI   s   @r1   r5   r5   D   s    G  (,CC C 	C
 C C C %CD 15.2).1/||1/  ,,-1/ t+	1/
  $;1/ -.1/ 
u||U\\D0%2E2LL	M1/r3   r5   c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTBatchNormzP
    Compute batch normalization over the sequence length (time) dimension.
    r;   c                     t         |           t        j                  |j                  |j
                        | _        y )Neps)r?   r@   r   BatchNorm1dd_modelnorm_eps	batchnormrH   r;   rI   s     r1   r@   zPatchTSTBatchNorm.__init__   s(    FOOLr3   inputsc                 l    |j                  dd      }| j                  |      }|j                  dd      S )a  
        Parameters:
            inputs (`torch.Tensor` of shape `(batch_size, sequence_length, d_model)`):
                input for Batch norm calculation
        Returns:
            `torch.Tensor` of shape `(batch_size, sequence_length, d_model)`
        r   r#   )r+   rt   )rH   rv   outputs      r1   r`   zPatchTSTBatchNorm.forward   s7     !!!Q''1%%r3   
ra   rb   rc   rd   r   r@   r)   rh   r`   rj   rk   s   @r1   rm   rm      s&    M~ M
&ell 
&r3   rm   rv   
mask_ratiounmasked_channel_indiceschannel_consistent_masking
mask_valuec                    |dk  s|dk\  rt        d| d      | j                  \  }}}}| j                  }	t        |d|z
  z        }
|r-t	        j
                  |d||	      }|j                  d|d      }nt	        j
                  ||||	      }t	        j                  ||||	      }d|ddddd|
f<   t	        j                  |d      }t	        j                  |d      }t	        j                  |d|	      }|j                  d      j                  ddd|      }|d|dd|ddddf<   | j                  |j                         |      }||d
   fS )a  random_masking: Mask the input considering the control variables.

    Args:
        inputs (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, num_features)`):
            The input tensor to mask.
        mask_ratio (`float`):
            Masking ratio applied to mask the input data during random pretraining. It is the number between 0 and 1.
        unmasked_channel_indices (list, *optional*):
            Indices of channels that will not be masked.
        channel_consistent_masking (bool, *optional*, defaults to `False`):
            When true, masking will be same across all channels of a timeseries. Otherwise, masking positions will vary
            across channels.
        mask_value (int, *optional*, defaults to 0):
            Define the value of masked patches for pretraining.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as input Tensor and mask tensor of shape [bs x c x
        n]
    r   r   zMask ratio z has to be between 0 and 1.deviceNr!   r$   )r%   index.r   )rB   rP   r   re   r)   randrepeatonesargsortgather	unsqueezemasked_fillrg   )rv   rz   r{   r|   r}   
batch_sizenum_channelssequence_lengthnum_featuresr   len_keepnoisemaskids_shuffleids_restoreinputs_masks                   r1   random_maskingr      sQ   4 A~q;zl2MNOO>Dll;Jo|]]F?a*n56H!

:q/&IQa0 

:|_VT ::j,ODDAyy --2.K--4K<<"K8D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$r3   num_forecast_mask_patchesc                 P   t        |t              r|g}|D cg c]  }d }}| j                  \  }}}}	t        j                  |||| j
                        }
g }d}t        |      }t        ||      D ]H  \  }}|dk  s||k\  rt        d| d      t        ||z  |z        }|j                  |||g       ||z  }J t        |d       }||k  r|d   d   ||z
  z   |d   d<   n||kD  r|d	   d   ||z
  z   |d	   d<   d}|D ]  \  }}}||z   }d|
||d
d
| d
f<   |} t        j                  |
j                  d         }|
|   }
|
j                  d	      j                  ddd|	      }
|d|
d
d
|d
d
d
d
f<   | j                  |
j                         |      }||
d   fS c c}w )a  Forecast masking that masks the last K patches where K is from the num_forecast_mask_patches.
    If num_forecast_mask_patches is a list, samples in the batch will be randomly masked by numbers defined in the list.

    Parameters:
        inputs (`torch.Tensor`):
            Input of shape `(bs, num_channels, num_patch, patch_length)`
        num_forecast_mask_patches (`list`):
            Number of patches to be masked at the end of each batch sample. e.g. 4 or [3, 5].
        unmasked_channel_indices (`list`, *optional*):
            Indices of channels that are not masked.
        mask_value (`int`, *optional*, defaults to 0):
            Values in the masked patches will be filled by `mask_value`.

    Returns:
        `tuple(torch.Tensor)`: inputs_mask, masked input, same shape as inputs Tensor and Mask tensor of shape `(bs,
        num_channels , num_patch)` or `(bs, tsg1, tsg2, num_channels, num_patch)`
    r   r   r   znum_forecast_mask_patches z6 should be greater than 0 and less than total patches.c                     | d   S )Nr#    )xs    r1   <lambda>z"forecast_masking.<locals>.<lambda>  s
    !A$ r3   )r   r#   r!   Nr   )
isinstancere   rP   r)   zerosr   sumziprB   appendsortedrandpermr   r   r   rg   )rv   r   r{   r}   _forecast_mask_ratiosr   r   r   r   r   t_listtotal_lengthtotal_ratiopatch_lengthratiotemp_lenbatch1	patch_lenbatch2permr   s                         r1   forecast_maskingr      s   0 +S1%>$?!'@A!AAA>Dll;Jo|;;z<WDFL*+K"#<>RS !e1 ?,\N:pq  zE)K78|UH56 ! F/Fj ay|zL'@Aq	!	
	"r
1
)BCr
1F"( 	1h("./VF]A	z{*+
 >>$**Q-(D:D>>"$$Q1l;D+23Q(!Q./$$TYY[*=KV$$O Bs   	F#c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTPatchifyz
    A class to patchify the time series sequence into different patches

    Returns:
        `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
    r;   c                    t         |           |j                  | _        |j                  | _        |j
                  | _        | j                  | j                  k  r&t        d| j                   d| j                   d      t        | j                  | j                        | j                  z
  | j
                  z  dz   | _        | j                  | j
                  | j                  dz
  z  z   }| j                  |z
  | _	        y )NzSequence length (z+) has to be greater than the patch length ()r   )
r?   r@   context_lengthr   r   patch_striderB   maxnum_patchessequence_start)rH   r;   new_sequence_lengthrI   s      r1   r@   zPatchTSTPatchify.__init__6  s    %44"//"//4#4#44#D$8$8#99deievevdwwxy 
   4 4d6G6GH4K\K\\aeararruvv"//$2C2CtGWGWZ[G[2\\"225HHr3   past_valuesc                 :   |j                   d   }|| j                  k7  rt        d| d| j                   d      |dd| j                  dddf   }|j	                  d| j
                  | j                        }|j                  dd      j                         }|S )a!  
        Parameters:
            past_values (`torch.Tensor` of shape `(batch_size, sequence_length, num_channels)`, *required*):
                Input for patchification

        Returns:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`
        zInput sequence length (z%) doesn't match model configuration (r=   N)	dimensionr(   step)	rP   r   rB   r   unfoldr   r   r+   r.   )rH   r   r   rx   s       r1   r`   zPatchTSTPatchify.forwardG  s     &++B/d222)/)::_`d`t`t_uuwx  Q 3 3 5q89$2C2C$J[J[\!!"b)446r3   ry   rk   s   @r1   r   r   .  s&    I~ I"5<< r3   r   c                   H     e Zd ZdZdef fdZdej                  fdZ xZ	S )PatchTSTMaskinga  
    Class to perform random or forecast masking.

    Parameters:
        config (`PatchTSTConfig`): model config
    Returns:
        x_mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
            Masked patched input
        mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
            Bool tensor indicating True on masked points
    r;   c                 <   t         |           |j                  | _        |j                  | _        |j                  | _        |j
                  | _        |j                  | _        |j                  | _        | j                  t        | j                        | _        y y N)	r?   r@   random_mask_ratior|   	mask_typer   r{   r}   r   ru   s     r1   r@   zPatchTSTMasking.__init__k  s    !'!9!9*0*K*K')))/)I)I&(.(G(G% ++((4,243P3P,QD) 5r3   patch_inputc                 r   | j                   dk(  r<t        || j                  | j                  | j                  | j
                        \  }}nY| j                   dk(  r1t        || j                  | j                  | j
                        \  }}nt        d| j                    d      |j                         }||fS )a  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input

        Return:
            masked_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`)
                Masked patched input
            mask (`torch.Tensor` of shape `(batch_size, num_channels, num_patches)`)
                Bool tensor indicating True on masked points

        random)rv   rz   r{   r|   r}   forecast)rv   r   r{   r}   zInvalid mask type .)
r   r   r   r{   r|   r}   r   r   rB   rg   )rH   r   masked_inputr   s       r1   r`   zPatchTSTMasking.forwardv  s     >>X%!/"11)-)F)F+/+J+J??"L$ ^^z)!1"*.*H*H)-)F)F??	"L$ 1$..1ACDD yy{T!!r3   ry   rk   s   @r1   r   r   ^  s&    
	R~ 	R!"5<< !"r3   r   c                   T     e Zd ZdZdef fdZddej                  dedz  fdZ	 xZ
S )	PatchTSTEncoderLayerz 
    PatchTST encoder layer
    r;   c           
         t         |           |j                  | _        t        |j                  |j
                  |j                  |      | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      | j                  r|j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      t        j*                  t        j,                  |j                  |j.                  |j0                        t3        |j4                            |j6                  dkD  rt        j                  |j6                        nt        j                         t        j,                  |j.                  |j                  |j0                              | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |j                  dk(  rt        |      | _        nX|j                  dk(  r1t        j                   |j                  |j"                        | _        nt%        |j                   d      |j>                  | _        y )N)r6   r7   r   r;   r   rt   	layernormro   z$ is not a supported norm layer type.r>   ) r?   r@   channel_attentionr5   rr   num_attention_headsattention_dropout	self_attnpath_dropoutr   DropoutIdentitydropout_path1	norm_typerm   norm_sublayer1	LayerNormrs   rB   dropout_path2norm_sublayer2
SequentialrC   ffn_dimr9   r   activation_function
ff_dropoutffdropout_path3norm_sublayer3pre_normru   s     r1   r@   zPatchTSTEncoderLayer.__init__  s   !'!9!9*nn00,,	
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWW !!DJDWDWZ[D[F,?,?!@acalalanD;.&7&?#!![0&(ll6>>v&W# F$4$4#55Y!Z[[ --IIfnnfnn6;;GF../1-3->->-BBJJv(()IIfnnfnn6;;G	
 AG@S@SVW@WRZZ(;(;<]_]h]h]j{*"3F";D,"$,,v~~6??"SD 0 011UVWWr3   Nhidden_staterL   c                    |j                   \  }}}}|j                  ||z  ||      }| j                  r;| j                  | j	                  |      |      \  }}}	|| j                  |      z   }n:| j                  ||      \  }}}	| j	                  || j                  |      z         }|j                  ||||      }| j                  r|j                  dd      j                         }|j                  ||z  ||      }| j                  r;| j                  | j                  |      |      \  }}
}	|| j                  |      z   }n:| j                  ||      \  }}
}	| j                  || j                  |      z         }|j                  ||||      }|j                  dd      j                         }|j                  ||z  ||      }| j                  r3|| j                  | j                  | j                  |                  z   }n2| j                  || j                  | j                  |            z         }|j                  ||||      }|f}|r|| j                  r|
fn|fz  }|S )a  
        Parameters:
            hidden_state (`torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`, *required*):
                Past values of the time series
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
        Return:
            `torch.Tensor` of shape `(batch_size, num_channels, sequence_length, d_model)`

        )rJ   rL   r#   r   )rP   rQ   r   r   r   r   rT   r   r+   r.   r   r   r   r   r   )rH   r   rL   r   num_input_channelsr   rr   r0   r/   r   channel_attn_weightsoutputss               r1   r`   zPatchTSTEncoderLayer.forward  s    DPCUCU@
& $((6H)H/[bc==+/>>"11,?Sd ,: ,(Kq ($*<*<[*IIL ,0>>*>O ,: ,(Kq  ..|d>P>PQ\>]/]^L $++J8JO]de !!'11!Q7BBDL',,Z/-IK]_fgL}}7;~~"&"5"5l"CWh 8F 8411  ,d.@.@.MM 8<~~".BS 8F 8411  $22<$BTBTU`Ba3ab (//
OM_ahiL'11!Q7BBDL $((6H)H/[bc== ($*<*<TWWTEXEXYeEf=g*hhL  ..|d>P>PQUQXQXYeQf>g/ghL $++J8JO]de/t?U?U&:;\h[jjGr3   r   )ra   rb   rc   rd   r   r@   r)   rh   rg   r`   rj   rk   s   @r1   r   r     s3    0(~ 0(dQELL QTD[ Qr3   r   c                       e Zd ZU eed<   dZdZdZdZdZ	dZ
dZ ej                         dej                  fd       Zdd	Zy
)PatchTSTPreTrainedModelr;   modelr   )timeFTr   c                    t        |t              rZt        | j                  j                  | j                  j
                        | j                  j
                  z
  | j                  j                  z  dz   }| j                  j                  r&t        j                  |j                  d       |dz  }|j                  | j                  |      }t               rrddl}|j                  j                  |j                   d      5  |j                   j#                         dkD  r t        j$                  |j                   |       ddd       yt        j$                  |j                   |       yt        |t&        j(                  t&        j*                  f      rt        j,                  |j.                         t        j0                  |j2                         t5        |dd      ^t        j,                  |j6                         t        j0                  |j8                         t        j,                  |j:                         yyt        |t&        j<                        rct        j                  |j2                  d| j                  j>                  	       |j.                   t        j,                  |j.                         yyy# 1 sw Y   yxY w)
z$
        Initialize weights
        r   g{Gz?)stdr   N)modifier_rankrunning_meanrO   )meanr   ) r   PatchTSTPositionalEncodingr   r;   r   r   r   use_cls_tokeninitnormal_	cls_token_init_per	   	deepspeedzeroGatheredParametersposition_encnumelcopy_r   r   rq   zeros_r9   ones_weightgetattrr   running_varnum_batches_trackedrC   init_std)rH   r   r   r   r   s        r1   _init_weightsz%PatchTSTPreTrainedModel._init_weights0  s   
 f89 DKK..0H0HIDKKLdLdd))*,-.K {{((V--48q !??4;;DL)+ ^^66v7J7JZ^6_ F**002Q6

6#6#6EF F 

6..=r~~ >?KK$JJv}}%v~t4@F//0

6--.F667 A 		*LLSdkk6J6JK{{&FKK( ' +F Fs   >J>>Kc                 4    t        |t              r||_        y y r   )r   PatchTSTEncodergradient_checkpointing)rH   r   r   s      r1   _set_gradient_checkpointingz3PatchTSTPreTrainedModel._set_gradient_checkpointingT  s    f0,1F) 1r3   N)F)ra   rb   rc   r   __annotations__base_model_prefixmain_input_nameinput_modalitiessupports_gradient_checkpointing_supports_flash_attn_supports_sdpa_supports_flex_attnr)   no_gradr   Moduler  r
  r   r3   r1   r   r   %  s[    #O &+#NU]]_!)BII !) !)F2r3   r   c                   D     e Zd Zdef fdZdej                  fdZ xZS )PatchTSTEmbeddingr;   c                    t         |           |j                  | _        |j                  | _        | j                  r0t	        j
                  |j                  |j                        | _        y t	        j                         | _        t        |j                        D ]E  }| j                  j                  t	        j
                  |j                  |j                               G y r   )r?   r@   r   share_embeddingr   rC   r   rr   input_embedding
ModuleListranger   )rH   r;   r   rI   s      r1   r@   zPatchTSTEmbedding.__init__Z  s    "(";";%55#%99V-@-@&..#QD #%==?D 6445 \$$++BIIf6I6I6>>,Z[\r3   r   c                 `   |j                   d   }|| j                  k7  rt        d| j                   d| d      | j                  r| j	                  |      }|S t        |      D cg c]$  } | j                  |   |dd|ddddf         & }}t        j                  |d      }|S c c}w )a%  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Patch input for embedding
        return:
            `torch.Tensor` of shape `(batch_size, num_channels, num_patches, d_model)`
        r   z&The defined number of input channels (zQ) in the config has to be the same as the number of channels in the batch input (r   Nr$   )rP   r   rB   r  r  r  r)   stack)rH   r   r   
embeddingsis        r1   r`   zPatchTSTEmbedding.forwardf  s     )..q1!8!8889P9P8Q RTTfSgghj  --k:J  UZZlTmnq1$..q1+aAqj2IJnJnZQ7J os   ')B+	ra   rb   rc   r   r@   r)   rh   r`   rj   rk   s   @r1   r  r  Y  s!    
\~ 
\5<< r3   r  c                   ~     e Zd ZdZdedef fdZedededej                  fd       Z
dej                  fdZ xZS )	r   z'
    Class for positional encoding
    r;   r   c                    t         |           |j                  | _        |j                  | _        |j                  r?t	        j
                  t        j                  ddd|j                              | _	        |dz  }| j                  ||      | _        |j                  dkD  r%t	        j                  |j                        | _        y t	        j                         | _        y )Nr   r   )r?   r@   r   r   r   	Parameterr)   r   rr   r   r   r   positional_dropoutr   r   rH   r;   r   rI   s      r1   r@   z#PatchTSTPositionalEncoding.__init__  s    #11"(";";\\%++aAv~~*NODN1K MM&+> 6<5N5NQR5RBJJv001 	XZXcXcXe 	r3   rM   c                 $   | j                   dk(  r7t        j                  t        j                  || j
                        d      }|S | j                   dk(  r#t        j                  || j
                        }t        j                  d|      j                  d      }t        j                  t        j                  d| j
                  d      t        j                  d      | j
                  z   z        }t        j                  ||z        |d d dd df<   t        j                  ||z        |d d dd df<   ||j                         z
  }||j                         d	z  z  }t        j                  |d
      }|S t!        | j                    d      )Nr   Trequires_gradsincosr   r   r#   g     @
   FzN is not a valid positional encoder. Available types are 'random' and 'sincos'.)positional_encoding_typer   r#  r)   randnrr   r   aranger   expmathlogsincosr   r   rB   )r;   r   r   positiondiv_terms        r1   r   z#PatchTSTPositionalEncoding._init_pe  sd    **h6<<K(P`deL  ,,8 ;;{FNNCL||A{3==a@Hyya!CQXHY\b\j\jHjFk!klH$)IIh.A$BLADqD!$)IIh.A$BLADqD!',*;*;*==L'<+;+;+=+BCL<<EJL
  223  4B  C r3   r   c                 x   | j                   r| j                  || j                  dd d d f   z         }| j                  | j                  d dd d f   z   }|j	                  |j
                  d   | j                  dd      }t        j                  ||fd      }|S | j                  || j                  z         }|S )Nr   r   r!   r#   r$   )	r   r$  r   r   expandrP   r   r)   cat)rH   r   r   
cls_tokensr   s        r1   r`   z"PatchTSTPositionalEncoding.forward  s    11+@Q@QRSRTVWRW@X2XYK):):2A2q5)AAI"))+*;*;A*>@W@WY[]_`J 99j+%>AFL   22;ARAR3RSLr3   )ra   rb   rc   rd   r   re   r@   staticmethodr   r#  r   r)   rh   r`   rj   rk   s   @r1   r   r   }  sX    
~ 
C 
  c bll  &5<< r3   r   c            	       j     e Zd ZdZdedef fdZ	 	 ddej                  de	dz  de	dz  d	e
fd
Z xZS )r  z
    PatchTST Encoder
    r;   r   c                 &   t         |   |       d| _        t        |      | _        t        ||      | _        t        j                  t        |j                        D cg c]  }t        |       c}      | _        | j                          y c c}w )NF)r?   r@   r	  r  embedderr   positional_encoderr   r  r  num_hidden_layersr   layers	post_init)rH   r;   r   r  rI   s       r1   r@   zPatchTSTEncoder.__init__  st     &+# *&1"<V["Qmm5QWQiQiKj$ka%9&%A$kl 	 %ls   BNr   output_hidden_statesrL   rM   c                 J   ||n| j                   j                  }||n| j                   j                  }| j                  |      }| j	                  |      }|rdnd}|rdnd}| j
                  D ]%  }|r||fz   } |||      }	|	d   }|s||	d   fz   }' t        |||      S )a  
        Parameters:
            patch_input (`torch.Tensor` of shape `(batch_size, num_channels, num_patches, patch_length)`, *required*):
                Past values of the time series
            output_hidden_states (bool, optional): Indicates if hidden states should be outputted.
            output_attentions (bool, optional): Indicates if attentions should be outputted.

        return:
            `BaseModelOutput`
        Nr   )r   rL   r   r   )last_hidden_staterJ   
attentions)r;   rL   rA  r<  r=  r?  r   )
rH   r   rA  rL   r   r   encoder_statesall_attentionsencoder_layerlayer_outputss
             r1   r`   zPatchTSTEncoder.forward  s    " 2C1N-TXT_T_TqTq$8$D $++JjJj 	
 mmK0..{;30d![[ 
	FM#!/</!A)|WhiM )+L !/=3C2E!E
	F ^hvwwr3   NN)ra   rb   rc   rd   r   re   r@   r)   rh   rg   r   r`   rj   rk   s   @r1   r  r    s_    ~ C " -1)-	)x\\)x #Tk)x  $;	)x 
)xr3   r  zG
    Base class for model's outputs, with potential hidden states.
    )custom_introc                   6   e Zd ZU dZdZej                  dz  ed<   dZe	ej                     dz  ed<   dZ
e	ej                     dz  ed<   dZej                  dz  ed<   dZej                  dz  ed<   dZej                  dz  ed<   dZej                  dz  ed	<   y)
PatchTSTModelOutputa>  
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
        one for the output of each layer) of shape `(batch_size, num_channels, height, width)`. Hidden-states of
        the model at the output of each layer plus the optional initial embedding outputs.
    mask (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches)`, *optional*):
        Bool masked tensor indicating which patches are masked
    loc (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
        Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
    scale (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*):
        Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
    patch_input (`torch.FloatTensor` of shape `(batch_size, num_channels, num_patches, patch_length)`):
        Patched input to the Transformer
    NrC  rJ   rD  r   locscaler   )ra   rb   rc   rd   rC  r)   FloatTensorr  rJ   ri   rD  r   rM  rN  r   r   r3   r1   rL  rL    s    " 37u((4/659M5**+d2926Je''(4/6%)D%

d
")$(C		T	!(&*E5t#*,0K""T)0r3   rL  z4
    Output type of [`PatchTSTForPretraining`].
    c                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                     dz  ed<   dZe
ej                     dz  ed<   y)PatchTSTForPretrainingOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        MSE loss.
    prediction_output (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
        Prediction outputs of the time series modeling heads.
    Nlossprediction_outputrJ   rD  )ra   rb   rc   rd   rR  r)   rO  r  rS  rJ   ri   rD  r   r3   r1   rQ  rQ    sh     &*D%

d
")26u((4/659M5**+d2926Je''(4/6r3   rQ  z3
    Output type of [`PatchTSTForRegression`].
    c                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                     dz  ed<   dZe
ej                     dz  ed<   y)PatchTSTForRegressionOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        MSE loss.
    regression_outputs (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
        Regression outputs of the time series modeling heads.
    NrR  regression_outputsrJ   rD  )ra   rb   rc   rd   rR  r)   rO  r  rV  rJ   ri   rD  r   r3   r1   rU  rU  )  sh     &*D%

d
")37))D0759M5**+d2926Je''(4/6r3   rU  z3
    Output type of [`PatchTSTForPrediction`].
    c                      e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                     dz  ed<   dZe
ej                     dz  ed<   dZej                  dz  ed<   dZej                  dz  ed<   y)	PatchTSTForPredictionOutputa!  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        MSE loss.
    prediction_outputs (`torch.FloatTensor` of shape `(batch_size, prediction_length, -1)`):
        Prediction outputs of the time series modeling heads.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    loc: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
        Mean of the input data (batch_size, sequence_length, num_channels) over the sequence_length
    scale: (`torch.FloatTensor` of shape `(batch_size, 1, num_channels)`, *optional*)
        Std of the input data (batch_size, sequence_length, num_channels) over the sequence_length
    NrR  prediction_outputsrJ   rD  rM  rN  )ra   rb   rc   rd   rR  r)   rO  r  rY  rJ   ri   rD  rM  rN  r   r3   r1   rX  rX  =  s    " &*D%

d
")37))D0759M5**+d2926Je''(4/6$(C		T	!(&*E5t#*r3   rX  z7
    Output type of [`PatchTSTForClassification`].
    c                       e Zd ZU dZdZej                  dz  ed<   dZej                  dz  ed<   dZ	e
ej                     dz  ed<   dZe
ej                     dz  ed<   y)PatchTSTForClassificationOutputa  
    loss (*optional*, returned when `labels` is provided, `torch.FloatTensor` of shape `(1,)`):
        Total loss as the sum of the masked language modeling loss and the next sequence prediction
        (classification) loss.
    prediction_logits (`torch.FloatTensor` of shape `(batch_size, num_targets)`):
        Prediction scores of the PatchTST modeling head (scores before SoftMax).
    NrR  prediction_logitsrJ   rD  )ra   rb   rc   rd   rR  r)   rO  r  r\  rJ   ri   rD  r   r3   r1   r[  r[  ]  sh     &*D%

d
")26u((4/659M5**+d2926Je''(4/6r3   r[  z
    Base class for time series model's predictions outputs that contains the sampled values from the chosen
    distribution.
    c                   :    e Zd ZU dZdZej                  dz  ed<   y)SamplePatchTSTOutputz
    sequences (`torch.FloatTensor` of shape `(batch_size, num_samples, prediction_length, num_targets)`):
        Sampled values from the chosen distribution.
    N	sequences)ra   rb   rc   rd   r_  r)   rO  r  r   r3   r1   r^  r^  r  s    
 +/Iu  4'.r3   r^  inputtargetrM   c                 &    | j                  |       S )zc
    Computes the negative log likelihood loss from input distribution with respect to target.
    )log_prob)r`  ra  s     r1   nllrd    s     NN6"""r3   input_tensorweightsc                 P   |t        j                  |dk7  | |z  t        j                  |             }t        j                  |r|j	                  |      n|j	                         d      }|r|j	                  |      |z  S |j	                         |z  S | j                  |      S )aj  
    Computes the weighted average of a given tensor across a given `dim`, masking values associated with weight zero,
    meaning instead of `nan * 0 = nan` you will get `0 * 0 = 0`.

    Args:
        input_tensor (`torch.FloatTensor`):
            Input tensor, of which the average must be computed.
        weights (`torch.FloatTensor`, *optional*):
            Weights tensor, of the same shape as `input_tensor`.
        dim (`int`, *optional*):
            The dim along which to average `input_tensor`.

    Returns:
        `torch.FloatTensor`: The tensor with values averaged along the specified `dim`.
    r   r$         ?min)r)   where
zeros_likeclampr   r   )re  rf  r%   weighted_tensorsum_weightss        r1   weighted_averagerp    s      ++glL74JEL\L\]iLjkkk#'++#+"67;;=VYZ03###,R]]]9L9L9NR]]]  S ))r3   c            	            e Zd ZdZdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ	 xZ
S )PatchTSTStdScalerz
    Standardize features by calculating the mean and scaling along the first dimension, and then normalizes it by
    subtracting from the mean and dividing by the standard deviation.
    r;   c                     t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  nd| _        t        |d      r|j                  | _        y d| _        y )Nscaling_dimr   keepdimTminimum_scalegh㈵>)r?   r@   hasattrrt  r%   ru  rv  ru   s     r1   r@   zPatchTSTStdScaler.__init__  s[    )0)G6%%Q)0)Cv~~5<V_5UV11[_r3   dataobserved_indicatorrM   c                    |j                  | j                  | j                        }|j                  d      }||z  j                  | j                  | j                        |z  }||z
  |z  dz  j                  | j                  | j                        |z  }t	        j
                  || j                  z         }||z
  |z  ||fS )C  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        ru  rh  r#   )r   r%   ru  	clamp_minr)   sqrtrv  )rH   rx  ry  denominatorrM  variancerN  s          r1   r`   zPatchTSTStdScaler.forward  s     ),,TXXt||,L!++C0((--dhh-MP[[Sj$661<AA$((TXT`T`Aadoo

8d&8&889s
e#S%//r3   ra   rb   rc   rd   r   r@   r)   rh   ri   r`   rj   rk   s   @r1   rr  rr    sS    
`~ `0LL06;ll0	u||U\\5<<7	80r3   rr  c            	            e Zd ZdZdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ	 xZ
S )PatchTSTMeanScalerz
    Computes a scaling factor as the weighted average absolute value along the first dimension, and scales the data
    accordingly.
    r;   c                 &   t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  nd| _        t        |d      r|j                  nd| _        t        |d      r|j                  | _        y d | _        y )Nrt  r   ru  Trv  绽|=default_scale)r?   r@   rw  rt  r%   ru  rv  r  ru   s     r1   r@   zPatchTSTMeanScaler.__init__  su    )0)G6%%Q)0)Cv~~5<V_5UV11[`5<V_5UV11[_r3   rx  ry  rM   c                    ||z  j                         j                  | j                  d      }|j                  | j                  d      }|t        j                  |d      z  }| j
                  Q|j                  d      }t        j                  |j                  d      d      }t        j                  ||z        }n"| j
                  t        j                  |      z  }t        j                  |dkD  ||      }t        j                  || j                        }||z  }	| j                  s|j                  | j                        }|	t        j                  |      |fS )r{  Tr|  r   ri  r   r$   )absr   r%   r)   rm  r  squeeze	ones_likerk  rv  ru  rl  )
rH   rx  ry  ts_sumnum_observedrN  	batch_sumbatch_observationsr  scaled_datas
             r1   r`   zPatchTSTMeanScaler.forward  s.    ++00266txx6N)--dhh-E\q99 %

q
)I!&\-=-=a-@a!H!MM)6H*HIM ..1GGM L1,e]C Et'9'9:Ul||MMdhhM/EE,,U3U::r3   r  rk   s   @r1   r  r    sS    
`~ `&;LL&;6;ll&;	u||U\\5<<7	8&;r3   r  c            
            e Zd ZdZdef fdZ	 d	dej                  dej                  dz  deej                  ej                  ej                  f   fdZ	 xZ
S )
PatchTSTNOPScalerz|
    Assigns a scaling factor equal to 1 along the first dimension, and therefore applies no scaling to the input data.
    r;   c                     t         |           t        |d      r|j                  nd| _        t        |d      r|j
                  | _        y d| _        y )Nrt  r   ru  T)r?   r@   rw  rt  r%   ru  ru   s     r1   r@   zPatchTSTNOPScaler.__init__  s@    )0)G6%%Q)0)Cv~~r3   Nrx  ry  rM   c                     t        j                  |d      j                  | j                  | j                        }t        j
                  |d      j                  | j                  | j                        }|||fS )a  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                input for Batch norm calculation
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, num_input_channels)`)
        Fr'  )r%   ru  )r)   r  r   r%   ru  rl  )rH   rx  ry  rN  rM  s        r1   r`   zPatchTSTNOPScaler.forward	  si     E:??DHHVZVbVb?ct59>>488UYUaUa>bS%r3   r   r  rk   s   @r1   r  r    s_    N~ N MQ LL 6;llT6I 	u||U\\5<<7	8 r3   r  c            	            e Zd Zdef fdZdej                  dej                  deej                  ej                  ej                  f   fdZ xZ	S )PatchTSTScalerr;   c                     t         |           |j                  dk(  s|j                  du rt        |      | _        y |j                  dk(  rt        |      | _        y t        |      | _        y )Nr   Tr   )r?   r@   r   r  scalerrr  r  ru   s     r1   r@   zPatchTSTScaler.__init__  sU    >>V#v~~'=,V4DK^^u$+F3DK+F3DKr3   rx  ry  rM   c                 8    | j                  ||      \  }}}|||fS )a>  
        Parameters:
            data (`torch.Tensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Input for scaler calculation
            observed_indicator (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Calculating the scale on the observed indicator.
        Returns:
            tuple of `torch.Tensor` of shapes
                (`(batch_size, sequence_length, num_input_channels)`,`(batch_size, 1, num_input_channels)`,
                `(batch_size, 1, um_input_channels)`)
        )r  )rH   rx  ry  rM  rN  s        r1   r`   zPatchTSTScaler.forward$  s)      ;;t-?@c5S%r3   )
ra   rb   rc   r   r@   r)   rh   ri   r`   rj   rk   s   @r1   r  r    sL    4~ 4 LL 6;ll 	u||U\\5<<7	8 r3   r  c                        e Zd Zdef fdZ	 	 	 	 	 ddej                  dej                  dz  dej                  dz  dedz  dedz  d	edz  d
ee	z  fdZ
 xZS )PatchTSTModelr;   c                 b   t         |   |       t        |      | _        t	        |      | _        |j                  | _        | j
                  j                  }| j                  rt        |      | _	        nt        j                         | _	        t        ||      | _        | j                          y )N)r   )r?   r@   r  r  r   
patchifierdo_mask_inputr   r   maskingr   r   r  encoderr@  r%  s      r1   r@   zPatchTSTModel.__init__8  s     $V,*62#11oo11*62DL;;=DL&v;G 	r3   Nr   past_observed_maskfuture_valuesrA  rL   return_dictrM   c           	         ||n| j                   j                  }||n| j                   j                  }||n| j                   j                  }|t	        j
                  |      }| j                  ||      \  }}	}
| j                  |      }| j                  r| j                  |      \  }}n| j                  |      d}}| j                  |||      }|s>|j                  |j                  |j                  f}|||	|
|fz   }t        d |D              S t        |j                  |j                  |j                  ||	|
|      S )a  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            future_values (`torch.BoolTensor` of shape `(batch_size, prediction_length, num_input_channels)`, *optional*):
                Future target values associated with the `past_values`
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*):
                Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTModelOutput` or tuple of `torch.Tensor` (if `return_dict`=False or `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTModel

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> model = PatchTSTModel.from_pretrained("namctin/patchtst_etth1_pretrain")

        >>> # during training, one provides both past and future values
        >>> outputs = model(
        ...     past_values=batch["past_values"],
        ...     future_values=batch["future_values"],
        ... )

        >>> last_hidden_state = outputs.last_hidden_state
        ```N)r   rA  rL   c              3   &   K   | ]	  }||  y wr   r   ).0vs     r1   	<genexpr>z(PatchTSTModel.forward.<locals>.<genexpr>  s     =qq}=s   )rC  rJ   rD  r   rM  rN  r   )r;   use_return_dictrL   rA  r)   r  r  r  r  r  r  rC  rJ   rD  ri   rL  )rH   r   r  r  rA  rL   r  r   scaled_past_valuesrM  rN  patched_valuesmasked_valuesr   encoder_outputr   s                   r1   r`   zPatchTSTModel.forwardJ  sY   n &1%<k$++B]B]1B1N-TXT_T_TqTq$8$D $++JjJj 	 %!&!= *.[BT)U&C );<"&,,~">M4"&,,~">4M%<Pdu & 
 %779U9UWeWpWpqGsE> BBG=G===",>>(66%00&
 	
r3   NNNNN)ra   rb   rc   r   r@   r)   rh   rg   ri   rL  r`   rj   rk   s   @r1   r  r  6  s    ~ * 37-1,0)-#'[
\\[
 "LL4/[
 ||d*	[

 #Tk[
  $;[
 D[[
 
$	$[
r3   r  c                   `     e Zd ZdZdef fdZdej                  dej                  fdZ xZ	S )PatchTSTMaskPretrainHeadz-
    Pretraining head for mask modelling
    r;   c                 0   t         |           |j                  dkD  rt        j                  |j                        nt        j
                         | _        t        j                  |j                  |j                        | _
        |j                  | _        y Nr   )r?   r@   head_dropoutr   r   r   r   rC   rr   r   linearr   ru   s     r1   r@   z!PatchTSTMaskPretrainHead.__init__  sh    :@:M:MPQ:Qrzz&"5"56WYWbWbWdii0C0CD#11r3   	embeddingrM   c                     | j                  | j                  |            }| j                  r|ddddddddf   }|S )a  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                            `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True

        Nr   )r  r   r   )rH   r  s     r1   r`   z PatchTSTMaskPretrainHead.forward  s>     KKY 78	!!QA+.Ir3   ry   rk   s   @r1   r  r    s/    2~ 2 %,, r3   r  z*
    The PatchTST for pretrain model.
    c                        e Zd Zdef fdZ	 	 	 	 ddej                  dej                  dz  dedz  dedz  dedz  d	ee	z  fd
Z
 xZS )PatchTSTForPretrainingr;   c                     t         |   |       d|_        t        |      | _        t        |      | _        | j                          y )NT)r;   )r?   r@   r  r  r   r  headr@  ru   s     r1   r@   zPatchTSTForPretraining.__init__  s<     #"&1
,V4	 	r3   Nr   r  rA  rL   r  rM   c                    ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }t        j                  d      }	 |	||j                        }
|
j                  d      |j                  z  j                         |j                  j                         dz   z  }|j                  }|s|f|dd	 z   }||f|z   }|S |}|S t        ||||j                  
      S )a	  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*): Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTForPretrainingOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
            `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTConfig, PatchTSTForPretraining

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> # Config for random mask pretraining
        >>> config = PatchTSTConfig(
        ...     num_input_channels=7,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     mask_type='random',
        ...     random_mask_ratio=0.4,
        ...     use_cls_token=True,
        ... )
        >>> # Config for forecast mask pretraining
        >>> config = PatchTSTConfig(
        ...     num_input_channels=7,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     mask_type='forecast',
        ...     num_forecast_mask_patches=5,
        ...     use_cls_token=True,
        ... )
        >>> model = PatchTSTForPretraining(config)

        >>> # during training, one provides both past and future values
        >>> outputs = model(past_values=batch["past_values"])

        >>> loss = outputs.loss
        >>> loss.backward()
        ```Tr   r  rA  rL   r  none	reductionr!   r$   r  r   )rR  rS  rJ   rD  )r;   r  r   r  rC  r   MSELossr   r   r   r   rJ   rQ  rD  )rH   r   r  rA  rL   r  r   model_outputx_hatrR  loss_valmasked_lossrE  r   s                 r1   r`   zPatchTSTForPretraining.forward  s   L &1%<k$++B]B] zz#1!5/ " 
 		,889 zzF+|778}}},|/@/@@EEG<K\K\K`K`KbejKjk%33ha!33G2=2I{nw.GN PWGN+^`l`w`w
 	
r3   )NNNN)ra   rb   rc   r   r@   r)   rh   rg   ri   rQ  r`   rj   rk   s   @r1   r  r    s    ~  37,0)-#'b
\\b
 "LL4/b
 #Tk	b

  $;b
 D[b
 
-	-b
r3   r  c                   D     e Zd Zdef fdZdej                  fdZ xZS )PatchTSTClassificationHeadr;   c                    t         |           |j                  | _        |j                  | _        t	        j
                  d      | _        |j                  dkD  rt	        j                  |j                        nt	        j                         | _
        t	        j                  |j                  |j                  z  |j                        | _        y Nr   	start_dimr   )r?   r@   r   pooling_typer   Flattenflattenr  r   r   r   rC   r   rr   num_targetsr  ru   s     r1   r@   z#PatchTSTClassificationHead.__init__:  s    #11"//zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWdii 9 9FNN JFL^L^_r3   r  c                 n   | j                   r|dddddddf   }ng| j                  dk(  r|j                  d      }nE| j                  dk(  r|j                  d      j                  }nt        d| j                   d      | j                  |      }| j                  | j                  |            }|S )	a[  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                     `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, num_targets)`

        Nr   r   r#   r$   r   pooling operator  is not implemented yet)	r   r  r   r   valuesrB   r  r  r   rH   r  pooled_embeddingrx   s       r1   r`   z"PatchTSTClassificationHead.forwardB  s     (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\<<(89T\\*:;<r3   r   rk   s   @r1   r  r  9  s!    `~ ` r3   r  z0
    The PatchTST for classification model.
    c                        e Zd Zdef fdZe	 	 	 	 	 ddej                  dej                  dz  dedz  dedz  dedz  d	edz  d
e	e
z  fd       Z xZS )PatchTSTForClassificationr;   c                     t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        t        |      | _        | j                          y )N+Setting `do_mask_input` parameter to False.F)
r?   r@   r  loggerwarningr  r   r  r  r@  ru   s     r1   r@   z"PatchTSTForClassification.__init__d  sT      NNHI#(F "6*
.v6	 	r3   Nr   target_valuesr  rA  rL   r  rM   c                 R   ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }	d}
|t        j                         } ||	|      }
|s|	f|dd z   }|
|
f|z   }|S |}|S t        |
|	|j                  |j                        S )ac  
        past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
            Input sequence to the model
        target_values (`torch.Tensor`, *optional*):
            Labels associates with the `past_values`
        past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:

            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Examples:

        ```python
        >>> from transformers import PatchTSTConfig, PatchTSTForClassification

        >>> # classification task with two input channel2 and 3 classes
        >>> config = PatchTSTConfig(
        ...     num_input_channels=2,
        ...     num_targets=3,
        ...     context_length=512,
        ...     patch_length=12,
        ...     stride=12,
        ...     use_cls_token=True,
        ... )
        >>> model = PatchTSTForClassification(config=config)

        >>> # during inference, one only provides past values
        >>> past_values = torch.randn(20, 512, 2)
        >>> outputs = model(past_values=past_values)
        >>> labels = outputs.prediction_logits
        ```NTr  r   r   )rR  r\  rJ   rD  )
r;   r  r   r  rC  r   CrossEntropyLossr[  rJ   rD  )rH   r   r  r  rA  rL   r  r   r  y_hatr  rR  r   s                r1   r`   z!PatchTSTForClassification.forwardr  s    Z &1%<k$++B]B]zz#1!5/ " 
 		,889$&&(DE=1Hha!33G/7/CxkG+GN JQGN.#&44#..	
 	
r3   r  )ra   rb   rc   r   r@   r   r)   rh   rg   ri   r[  r`   rj   rk   s   @r1   r  r  ^  s    ~   .2*.,0)-#'E
\\E
 ||d*E
 !4K	E

 #TkE
  $;E
 D[E
 
0	0E
 E
r3   r  z,
    The PatchTST for regression Model.
    c                   J     e Zd Zddedef fdZdej                  fdZ xZ	S )PatchTSTPredictionHeadr;   r   c                    t         |           |j                  | _        |j                  | _        |j                  | _        |j
                  | _        | j
                  s| j                  r|j                  }n|j                  |z  }| j                  sVt        j                         | _	        t        j                         | _
        t        j                         | _        t        | j                        D ]  }| j                  j                  t        j                  d             |:| j                  j                  t        j                  ||j                                n*| j                  j                  |j#                  |             | j                  j                  |j$                  dkD  rt        j&                  |j$                        nt        j(                                 yt        j                  d      | _        |&t        j                  ||j                         | _        n|j#                  |      | _        |j$                  dkD  rt        j&                  |j$                        nt        j(                         | _        y)a  
        num_patches (`int`):
            The number of patches in the input sequence.
        distribution_output (`DistributionOutput`, *optional*):
            The distribution output layer for probabilistic forecasting. If None, a linear output layer is used.
        r#   r  Nr   )r?   r@   share_projectionr   r   r  rr   r   r  projectionsdropoutsflattensr  r   r  rC   prediction_lengthget_parameter_projectionr  r   r   r  
projectionr   )rH   r;   r   distribution_outputrA   r  rI   s         r1   r@   zPatchTSTPredictionHead.__init__  s    	 & 7 7"(";";#11"// 2 2~~H~~3H$$!}}DMMODMMMODM4223 t$$RZZ!%<=&.$$++BIIh@X@X,YZ $$++,?,X,XYa,bc$$H[H[^_H_RZZ0C0C%Degepeperst ::2DL"*"$))Hf6N6N"O #6"N"Nx"X>D>Q>QTU>U2::f&9&9:[][f[f[hDLr3   r  c                    | j                   r|dddddddf   }nP| j                  dk(  r|j                  d      }n.| j                  dk(  r|j                  d      j                  }n|}| j
                  sg }t        | j                        D ]\  } | j                  |   |dd|ddf         } | j                  |   |      } | j                  |   |      }|j                  |       ^ t        j                  |d      }n3| j                  |      }| j                  |      }| j!                  |      }t#        |t$              rt%        d |D              }|S |j'                  dd      }|S )	aj  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                     `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, forecast_len, num_channels)`

        Nr   r   r#   r$   r   r   c              3   @   K   | ]  }|j                  d d        yw)r#   r   N)r+   )r  zs     r1   r  z1PatchTSTPredictionHead.forward.<locals>.<genexpr>  s     =1;;q!,=s   )r   r  r   r   r  r  r  r   r  r  r  r   r)   r  r  r   r  r   ri   r+   )rH   r  r  rx   r  s        r1   r`   zPatchTSTPredictionHead.forward  st    (Aq!4  F*#,>>a>#8 ""e+#,==Q=#7#>#>  $- $$F4223 0#34==#34DQ1W4M#N #34==#34D#E  $74#3#3A#67G#H ./0 [[Q/F  $||,<=#||,<= __%56Ffe$=f==F  %%a+Fr3   r   )
ra   rb   rc   r   re   r@   r)   rh   r`   rj   rk   s   @r1   r  r    s*    )i~ )iC )iV1 1r3   r  z,
    The PatchTST for prediction model.
    c                   "    e Zd Zdef fdZ	 	 	 	 	 ddej                  dej                  dz  dej                  dz  dedz  dedz  d	edz  d
ee	z  fdZ
 ej                         	 ddej                  dej                  dz  d
efd       Z xZS )PatchTSTForPredictionr;   c                    t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        |j                  dk(  rd | _        n|j                  dk(  rt        |j                        | _        nn|j                  dk(  rt        |j                        | _        nC|j                  dk(  rt        |j                        | _        nt        d|j                         t        || j                  j                  j                   | j                  	      | _        | j%                          y )
Nr  Fmse	student_tr$   normalnegative_binomialUnknown distribution output )r  )r?   r@   r  r  r  r  r   rR  r  r   r  r   r   rB   r  r  r   r  r@  ru   s     r1   r@   zPatchTSTForPrediction.__init__&  s     NNHI#(F "6*
;;%'+D$))[8+9f>V>V+W(++x7+7F<T<T+U(++/BB+AfF^F^+_( #?@Z@Z?[!\]]*DJJ))554KcKc
	
 	r3   Nr   r  r  rA  rL   r  rM   c                    ||n| j                   j                  }| j                  ||||d      }| j                  |j                        }	d}
| j
                  r|	}n|	|j                  z  |j                  z   }|u| j
                  rJ| j
                  j                  |	|j                  |j                        }t        ||      }
t        |
      }
nt        j                  d      } |||      }
|j                  }|j                  }|s|f|dd z   }|
|
f|z   }|S |}|S t        |
||j                  |j                  ||	      S )
aV	  
        Parameters:
            past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
                Input sequence to the model
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            future_values (`torch.Tensor` of shape `(bs, forecast_len, num_input_channels)`, *optional*):
                Future target values associated with the `past_values`
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers
            output_attentions (`bool`, *optional*):
                Whether or not to return the output attention of all layers
            return_dict (`bool`, *optional*):
                Whether or not to return a `ModelOutput` instead of a plain tuple.

        Returns:
            `PatchTSTForPredictionOutput` or tuple of `torch.Tensor` (if `return_dict`=False or
            `config.return_dict`=False)

        Examples:

        ```python
        >>> from huggingface_hub import hf_hub_download
        >>> import torch
        >>> from transformers import PatchTSTConfig, PatchTSTForPrediction

        >>> file = hf_hub_download(
        ...     repo_id="hf-internal-testing/etth1-hourly-batch", filename="train-batch.pt", repo_type="dataset"
        ... )
        >>> batch = torch.load(file)

        >>> # Prediction task with 7 input channels and prediction length is 96
        >>> model = PatchTSTForPrediction.from_pretrained("namctin/patchtst_etth1_forecast")

        >>> # during training, one provides both past and future values
        >>> outputs = model(
        ...     past_values=batch["past_values"],
        ...     future_values=batch["future_values"],
        ... )

        >>> loss = outputs.loss
        >>> loss.backward()

        >>> # during inference, one only provides past values, the model outputs future values
        >>> outputs = model(past_values=batch["past_values"])
        >>> prediction_outputs = outputs.prediction_outputs
        ```NTr  rM  rN  r   r  r   r!   )rR  rY  rJ   rD  rM  rN  )r;   r  r   r  rC  r  rN  rM  distributionrd  rp  r   r  rX  rJ   rD  )rH   r   r  r  rA  rL   r  r   r  r  r  	y_hat_outr  rR  rM  rN  r   s                    r1   r`   zPatchTSTForPrediction.forwardC  sn   | &1%<k$++B]B] zz#1!5/ " 
 		,889##I 2 22\5E5EEI$''#77DD|//|7I7I  E   |];+H5zzF3	=9"" l\!B%77G/7/CxkG+GN JQGN*(&44#..
 	
r3   c                    | j                   j                  } | |d|d      }| j                  rz| j                  j                  |j                  |j
                  |j                        }t        |      D cg c]  }|j                          }}t        j                  |d      }n|j                  j                  d      }t        |      S c c}w )a   
        Generate sequences of sample predictions from a model with a probability distribution head.

        Parameters:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
            samples, prediction_length, 1)` or `(batch_size, number of samples, prediction_length, num_input_channels)`
            for multivariate predictions.
        NF)r   r  r  rA  r  r   r$   r_  )r;   num_parallel_samplesr  r  rY  rM  rN  r  sampler)   r  r   r^  rH   r   r  r  r   r  r   sampless           r1   generatezPatchTSTForPrediction.generate  s    2  ${{?? #1!&	
 ##33@@**7== A L 7<<P6QR|**,RGRkk'q1G00::1=G#g66 Ss   8Cr  r   )ra   rb   rc   r   r@   r)   rh   rg   ri   rX  r`   r  r^  r  rj   rk   s   @r1   r  r     s    ~ @ 37-1,0)-#'l
\\l
 "LL4/l
 ||d*	l

 #Tkl
  $;l
 D[l
 
,	,l
\ U]]_ 37-7\\-7 "LL4/-7 
	-7 -7r3   r  c                   J     e Zd ZdZddef fdZdej                  fdZ xZ	S )PatchTSTRegressionHeadz
    Regression head
    r;   c                    t         |           |j                  | _        |j                  | _        |j
                  | _        || _        |j                  |j                  z  }t        j                  d      | _        |j                  dkD  rt        j                  |j                        nt        j                         | _        |&t        j                   ||j"                        | _        y |j'                  |      | _        y r  )r?   r@   output_rangey_ranger   r  r  r   rr   r   r  r  r  r   r   r   rC   r  r  r  )rH   r;   r  rA   rI   s       r1   r@   zPatchTSTRegressionHead.__init__  s    **#11"//#6 ,,v~~=zzA.:@:M:MPQ:Qrzz&"5"56WYWbWbWd& ii&2D2DEDO1JJ8TDOr3   r  c                 2   | j                   r|dddddddf   }ng| j                  dk(  r|j                  d      }nE| j                  dk(  r|j                  d      j                  }nt        d| j                   d      | j                  | j                  |            }| j                  |      }| j                  du | j                  duz  rEt        j                  |      | j                  d	   | j                  d   z
  z  | j                  d   z   }|S )
aY  
        Parameters:
            embedding (`torch.Tensor` of shape `(bs, num_channels, num_patches, d_model)` or
                    `(bs, num_channels, num_patches+1, d_model)` if `cls_token` is set to True, *required*):
                Embedding from the model
        Returns:
            `torch.Tensor` of shape `(bs, output_dim)`

        Nr   r   r#   r$   r   r  r  r   )r   r  r   r   r  rB   r   r  r  r  r  r)   sigmoidr  s       r1   r`   zPatchTSTRegressionHead.forward  s    (Aq!4&((~~!~4%'(}}}3::01B1B0CCZ[\\  <<5E(FG !12$$,T1IJ]]6*dll1oQ.OPSWS_S_`aSbbFr3   r   ry   rk   s   @r1   r  r    s&    U~ U" r3   r  z,
    The PatchTST for regression model.
    c                   ,    e Zd Zdef fdZe	 	 	 	 	 ddej                  dej                  dz  dej                  dz  dedz  dedz  d	edz  d
e	e
z  fd       Z ej                         	 ddej                  dej                  dz  d
efd       Z xZS )PatchTSTForRegressionr;   c                 J   t         |   |       |j                  rt        j	                  d       d|_        t        |      | _        |j                  dk(  rd | _        n|j                  dk(  rt        |j                        | _        nn|j                  dk(  rt        |j                        | _        nC|j                  dk(  rt        |j                        | _        nt        d|j                         t        || j                        | _        | j!                          y )	Nr  Fr  r  r$   r  r  r  )r?   r@   r  r  r  r  r   rR  r  r   r  r   r   rB   r  r  r@  ru   s     r1   r@   zPatchTSTForRegression.__init__  s      NNHI#(F "6*
;;%'+D$))[8+9f>P>P+Q(++x7+7F<N<N+O(++/BB+AfFXFX+Y( #?@Z@Z?[!\]]*643K3KL	 	r3   Nr   r  r  rA  rL   r  rM   c                     ||n j                   j                  } j                  ||||d      } j                  |j                        }	d}
|r j
                  rG j
                  j                  |	      }t         fd|	D              }	t        ||      }
t        |
      }
nt        j                  d      }
 |
|	|      }
|s|	f|dd z   }|
|
f|z   }|S |}|S t        |
|	|j                  |j                  	      S )
a#  
        past_values (`torch.Tensor` of shape `(bs, sequence_length, num_input_channels)`, *required*):
            Input sequence to the model
        target_values (`torch.Tensor` of shape `(bs, num_input_channels)`):
            Target values associates with the `past_values`
        past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
            Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
            in `[0, 1]`:

            - 1 for values that are **observed**,
            - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).
            Whether or not to return a `ModelOutput` instead of a plain tuple.

        Examples:

        ```python
        >>> from transformers import PatchTSTConfig, PatchTSTForRegression

        >>> # Regression task with 6 input channels and regress 2 targets
        >>> model = PatchTSTForRegression.from_pretrained("namctin/patchtst_etth1_regression")

        >>> # during inference, one only provides past values, the model outputs future values
        >>> past_values = torch.randn(20, 512, 6)
        >>> outputs = model(past_values=past_values)
        >>> regression_outputs = outputs.regression_outputs
        ```NTr  c              3   j   K   | ]*  }|j                  d j                  j                         , yw)r!   N)rQ   r;   r  )r  itemrH   s     r1   r  z0PatchTSTForRegression.forward.<locals>.<genexpr>p  s&     WdiiDKK,C,CDWs   03r   r  r   r   )rR  rV  rJ   rD  )r;   r  r   r  rC  r  r  ri   rd  rp  r   r  rU  rJ   rD  )rH   r   r  r  rA  rL   r  r   r  r  rR  r  r   s   `            r1   r`   zPatchTSTForRegression.forward9  s   L &1%<k$++B]B]zz#1!5/ " 
 		,889$''#77DDUKWQVWW<7'-zzF3E=1ha!33G+/+;tg'GN BIGN*$&44#..	
 	
r3   c                 v   | j                   j                  } | |d|d      }| j                  j                  |j                        }t        |      D cg c]  }|j                          }}t        j                  |d      j                  d|| j                   j                        }t        |      S c c}w )a  
        Generate sequences of sample predictions from a model with a probability distribution head.

        Parameters:
            past_values (`torch.FloatTensor` of shape `(batch_size, sequence_length, num_input_channels)`):
                Past values of the time series that serves as context in order to predict the future.
            past_observed_mask (`torch.BoolTensor` of shape `(batch_size, sequence_length, num_input_channels)`, *optional*):
                Boolean mask to indicate which `past_values` were observed and which were missing. Mask values selected
                in `[0, 1]`:

                - 1 for values that are **observed**,
                - 0 for values that are **missing** (i.e. NaNs that were replaced by zeros).

        Return:
            [`SamplePatchTSTOutput`] where the outputs `sequences` tensor will have shape `(batch_size, number of
            samples, num_targets)`.
        NF)r   r  r  rA  r   r$   r!   r  )r;   r  r  r  rV  r  r  r)   r  rQ   r  r^  r  s           r1   r  zPatchTSTForRegression.generate  s    0  ${{?? #1!&	
 //<<W=W=WX278L2MNQ<&&(NN++g1-2227KT[[MdMde#g66 Os   B6r  r   )ra   rb   rc   r   r@   r   r)   rh   rg   ri   rU  r`   r  r^  r  rj   rk   s   @r1   r  r    s    ~ 4  .226,0)-#'H
\\H
 ||d*H
 "LL4/	H

 #TkH
  $;H
 D[H
 
,	,H
 H
T U]]_ 37'7\\'7 "LL4/'7 
	'7 '7r3   r  )r  r   r  r  r  r  )NrO   )NFr   r  rI  )Prd   r/  collections.abcr   dataclassesr   r)   r    r   r   activationsr   integrations.deepspeedr	   modeling_flash_attention_utilsr
   modeling_outputsr   modeling_utilsr   r   processing_utilsr   time_series_utilsr   r   r   utilsr   r   r   r   configuration_patchtstr   
get_loggerra   r  r  rh   rf   r2   r5   rm   listrg   re   r   r   r   r   r   r   r  r   r  rL  rQ  rU  rX  r[  r^  distributionsDistributionrd  rp  rr  r  r  r  r  r  r  r  r  r  r  r  r  __all__r   r3   r1   <module>r     s     $ !   & " @ B / F & U U M M 2 
		H	% !%II%<<% 
% <<	%
 LL4'% T\% % '(%:S/		 S/l&		 &2 -1',7%LL7%7% #Tk7% !%	7%
 7%z -1	A%LLA%#czA% #TkA% 	A%H-ryy -`9"bii 9"xH299 HV 02o 02 02f!		 !H5 5p<x- <x~ 
1+ 1 16 
7; 7 7 
7+ 7 7 
++ + +4 
7k 7 7 /; / /#u""// # #%,, #*5<< *%,,:M *chcoco *2 0		  0H3; 3;n 		  6 RYY  8 n
+ n
 n
bryy 8 
m
4 m

m
`" "J 
U
 7 U

U
p 
]RYY ]
]@ 
z73 z7
z7z4RYY 4n 
N73 N7
N7br3   