1# Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Utilities for preprocessing sequence data. 16""" 17# pylint: disable=invalid-name 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22from keras_preprocessing import sequence 23 24from tensorflow.python.keras import utils 25from tensorflow.python.util.tf_export import keras_export 26 27pad_sequences = sequence.pad_sequences 28make_sampling_table = sequence.make_sampling_table 29skipgrams = sequence.skipgrams 30# TODO(fchollet): consider making `_remove_long_seq` public. 31_remove_long_seq = sequence._remove_long_seq # pylint: disable=protected-access 32 33 34@keras_export('keras.preprocessing.sequence.TimeseriesGenerator') 35class TimeseriesGenerator(sequence.TimeseriesGenerator, utils.Sequence): 36 """Utility class for generating batches of temporal data. 37 This class takes in a sequence of data-points gathered at 38 equal intervals, along with time series parameters such as 39 stride, length of history, etc., to produce batches for 40 training/validation. 41 # Arguments 42 data: Indexable generator (such as list or Numpy array) 43 containing consecutive data points (timesteps). 44 The data should be at 2D, and axis 0 is expected 45 to be the time dimension. 46 targets: Targets corresponding to timesteps in `data`. 47 It should have same length as `data`. 48 length: Length of the output sequences (in number of timesteps). 49 sampling_rate: Period between successive individual timesteps 50 within sequences. For rate `r`, timesteps 51 `data[i]`, `data[i-r]`, ... `data[i - length]` 52 are used for create a sample sequence. 53 stride: Period between successive output sequences. 54 For stride `s`, consecutive output samples would 55 be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. 56 start_index: Data points earlier than `start_index` will not be used 57 in the output sequences. This is useful to reserve part of the 58 data for test or validation. 59 end_index: Data points later than `end_index` will not be used 60 in the output sequences. This is useful to reserve part of the 61 data for test or validation. 62 shuffle: Whether to shuffle output samples, 63 or instead draw them in chronological order. 64 reverse: Boolean: if `true`, timesteps in each output sample will be 65 in reverse chronological order. 66 batch_size: Number of timeseries samples in each batch 67 (except maybe the last one). 68 # Returns 69 A [Sequence](/utils/#sequence) instance. 70 # Examples 71 ```python 72 from keras.preprocessing.sequence import TimeseriesGenerator 73 import numpy as np 74 data = np.array([[i] for i in range(50)]) 75 targets = np.array([[i] for i in range(50)]) 76 data_gen = TimeseriesGenerator(data, targets, 77 length=10, sampling_rate=2, 78 batch_size=2) 79 assert len(data_gen) == 20 80 batch_0 = data_gen[0] 81 x, y = batch_0 82 assert np.array_equal(x, 83 np.array([[[0], [2], [4], [6], [8]], 84 [[1], [3], [5], [7], [9]]])) 85 assert np.array_equal(y, 86 np.array([[10], [11]])) 87 ``` 88 """ 89 pass 90 91 92keras_export('keras.preprocessing.sequence.pad_sequences')(pad_sequences) 93keras_export( 94 'keras.preprocessing.sequence.make_sampling_table')(make_sampling_table) 95keras_export('keras.preprocessing.sequence.skipgrams')(skipgrams) 96