1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Estimators for time series models.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import functools 22 23from tensorflow.contrib.timeseries.python.timeseries import ar_model 24from tensorflow.contrib.timeseries.python.timeseries import feature_keys 25from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib 26from tensorflow.contrib.timeseries.python.timeseries import math_utils 27from tensorflow.contrib.timeseries.python.timeseries import state_management 28from tensorflow.contrib.timeseries.python.timeseries.state_space_models import state_space_model 29from tensorflow.contrib.timeseries.python.timeseries.state_space_models import structural_ensemble 30from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filtering_postprocessor import StateInterpolatingAnomalyDetector 31 32from tensorflow.python.estimator import estimator_lib 33from tensorflow.python.estimator.canned import optimizers 34from tensorflow.python.estimator.export import export_lib 35from tensorflow.python.feature_column import feature_column_lib as feature_column 36from tensorflow.python.framework import dtypes 37from tensorflow.python.framework import ops 38from tensorflow.python.framework import tensor_shape 39from tensorflow.python.framework import tensor_util 40from tensorflow.python.ops import array_ops 41from tensorflow.python.ops import math_ops 42from tensorflow.python.ops import parsing_ops 43from tensorflow.python.training import training as train 44from tensorflow.python.util import nest 45 46 47class TimeSeriesRegressor(estimator_lib.Estimator): 48 """An Estimator to fit and evaluate a time series model.""" 49 50 def __init__(self, model, state_manager=None, optimizer=None, model_dir=None, 51 config=None, head_type=ts_head_lib.TimeSeriesRegressionHead): 52 """Initialize the Estimator. 53 54 Args: 55 model: The time series model to wrap (inheriting from TimeSeriesModel). 56 state_manager: The state manager to use, or (by default) 57 PassthroughStateManager if none is needed. 58 optimizer: The optimization algorithm to use when training, inheriting 59 from tf.train.Optimizer. Defaults to Adam with step size 0.02. 60 model_dir: See `Estimator`. 61 config: See `Estimator`. 62 head_type: The kind of head to use for the model (inheriting from 63 `TimeSeriesRegressionHead`). 64 """ 65 input_statistics_generator = math_utils.InputStatisticsFromMiniBatch( 66 dtype=model.dtype, num_features=model.num_features) 67 if state_manager is None: 68 if isinstance(model, ar_model.ARModel): 69 state_manager = state_management.FilteringOnlyStateManager() 70 else: 71 state_manager = state_management.PassthroughStateManager() 72 if optimizer is None: 73 optimizer = train.AdamOptimizer(0.02) 74 self._model = model 75 ts_regression_head = head_type( 76 model=model, state_manager=state_manager, optimizer=optimizer, 77 input_statistics_generator=input_statistics_generator) 78 model_fn = ts_regression_head.create_estimator_spec 79 super(TimeSeriesRegressor, self).__init__( 80 model_fn=model_fn, 81 model_dir=model_dir, 82 config=config) 83 84 def _model_start_state_placeholders( 85 self, batch_size_tensor, static_batch_size=None): 86 """Creates placeholders with zeroed start state for the current model.""" 87 gathered_state = {} 88 # Models may not know the shape of their state without creating some 89 # variables/ops. Avoid polluting the default graph by making a new one. We 90 # use only static metadata from the returned Tensors. 91 with ops.Graph().as_default(): 92 self._model.initialize_graph() 93 # Evaluate the initial state as same-dtype "zero" values. These zero 94 # constants aren't used, but are necessary for feeding to 95 # placeholder_with_default for the "cold start" case where state is not 96 # fed to the model. 97 def _zeros_like_constant(tensor): 98 return tensor_util.constant_value(array_ops.zeros_like(tensor)) 99 start_state = nest.map_structure( 100 _zeros_like_constant, self._model.get_start_state()) 101 for prefixed_state_name, state in ts_head_lib.state_to_dictionary( 102 start_state).items(): 103 state_shape_with_batch = tensor_shape.TensorShape( 104 (static_batch_size,)).concatenate(state.shape) 105 default_state_broadcast = array_ops.tile( 106 state[None, ...], 107 multiples=array_ops.concat( 108 [batch_size_tensor[None], 109 array_ops.ones(len(state.shape), dtype=dtypes.int32)], 110 axis=0)) 111 gathered_state[prefixed_state_name] = array_ops.placeholder_with_default( 112 input=default_state_broadcast, 113 name=prefixed_state_name, 114 shape=state_shape_with_batch) 115 return gathered_state 116 117 def build_one_shot_parsing_serving_input_receiver_fn( 118 self, filtering_length, prediction_length, default_batch_size=None, 119 values_input_dtype=None, truncate_values=False): 120 """Build an input_receiver_fn for export_savedmodel accepting tf.Examples. 121 122 Only compatible with `OneShotPredictionHead` (see `head`). 123 124 Args: 125 filtering_length: The number of time steps used as input to the model, for 126 which values are provided. If more than `filtering_length` values are 127 provided (via `truncate_values`), only the first `filtering_length` 128 values are used. 129 prediction_length: The number of time steps requested as predictions from 130 the model. Times and all exogenous features must be provided for these 131 steps. 132 default_batch_size: If specified, must be a scalar integer. Sets the batch 133 size in the static shape information of all feature Tensors, which means 134 only this batch size will be accepted by the exported model. If None 135 (default), static shape information for batch sizes is omitted. 136 values_input_dtype: An optional dtype specification for values in the 137 tf.Example protos (either float32 or int64, since these are the numeric 138 types supported by tf.Example). After parsing, values are cast to the 139 model's dtype (float32 or float64). 140 truncate_values: If True, expects `filtering_length + prediction_length` 141 values to be provided, but only uses the first `filtering_length`. If 142 False (default), exactly `filtering_length` values must be provided. 143 144 Returns: 145 An input_receiver_fn which may be passed to the Estimator's 146 export_savedmodel. 147 148 Expects features contained in a vector of serialized tf.Examples with 149 shape [batch size] (dtype `tf.string`), each tf.Example containing 150 features with the following shapes: 151 times: [filtering_length + prediction_length] integer 152 values: [filtering_length, num features] floating point. If 153 `truncate_values` is True, expects `filtering_length + 154 prediction_length` values but only uses the first `filtering_length`. 155 all exogenous features: [filtering_length + prediction_length, ...] 156 (various dtypes) 157 """ 158 if values_input_dtype is None: 159 values_input_dtype = dtypes.float32 160 if truncate_values: 161 values_proto_length = filtering_length + prediction_length 162 else: 163 values_proto_length = filtering_length 164 165 def _serving_input_receiver_fn(): 166 """A receiver function to be passed to export_savedmodel.""" 167 times_column = feature_column.numeric_column( 168 key=feature_keys.TrainEvalFeatures.TIMES, dtype=dtypes.int64) 169 values_column = feature_column.numeric_column( 170 key=feature_keys.TrainEvalFeatures.VALUES, dtype=values_input_dtype, 171 shape=(self._model.num_features,)) 172 parsed_features_no_sequence = ( 173 feature_column.make_parse_example_spec( 174 list(self._model.exogenous_feature_columns) 175 + [times_column, values_column])) 176 parsed_features = {} 177 for key, feature_spec in parsed_features_no_sequence.items(): 178 if isinstance(feature_spec, parsing_ops.FixedLenFeature): 179 if key == feature_keys.TrainEvalFeatures.VALUES: 180 parsed_features[key] = feature_spec._replace( 181 shape=((values_proto_length,) 182 + feature_spec.shape)) 183 else: 184 parsed_features[key] = feature_spec._replace( 185 shape=((filtering_length + prediction_length,) 186 + feature_spec.shape)) 187 elif feature_spec.dtype == dtypes.string: 188 parsed_features[key] = parsing_ops.FixedLenFeature( 189 shape=(filtering_length + prediction_length,), 190 dtype=dtypes.string) 191 else: # VarLenFeature 192 raise ValueError("VarLenFeatures not supported, got %s for key %s" 193 % (feature_spec, key)) 194 tfexamples = array_ops.placeholder( 195 shape=[default_batch_size], dtype=dtypes.string, name="input") 196 features = parsing_ops.parse_example( 197 serialized=tfexamples, 198 features=parsed_features) 199 features[feature_keys.TrainEvalFeatures.TIMES] = array_ops.squeeze( 200 features[feature_keys.TrainEvalFeatures.TIMES], axis=-1) 201 features[feature_keys.TrainEvalFeatures.VALUES] = math_ops.cast( 202 features[feature_keys.TrainEvalFeatures.VALUES], 203 dtype=self._model.dtype)[:, :filtering_length] 204 features.update( 205 self._model_start_state_placeholders( 206 batch_size_tensor=array_ops.shape( 207 features[feature_keys.TrainEvalFeatures.TIMES])[0], 208 static_batch_size=default_batch_size)) 209 return export_lib.ServingInputReceiver( 210 features, {"examples": tfexamples}) 211 return _serving_input_receiver_fn 212 213 def build_raw_serving_input_receiver_fn( 214 self, default_batch_size=None, default_series_length=None): 215 """Build an input_receiver_fn for export_savedmodel which accepts arrays. 216 217 Automatically creates placeholders for exogenous `FeatureColumn`s passed to 218 the model. 219 220 Args: 221 default_batch_size: If specified, must be a scalar integer. Sets the batch 222 size in the static shape information of all feature Tensors, which means 223 only this batch size will be accepted by the exported model. If None 224 (default), static shape information for batch sizes is omitted. 225 default_series_length: If specified, must be a scalar integer. Sets the 226 series length in the static shape information of all feature Tensors, 227 which means only this series length will be accepted by the exported 228 model. If None (default), static shape information for series length is 229 omitted. 230 Returns: 231 An input_receiver_fn which may be passed to the Estimator's 232 export_savedmodel. 233 """ 234 def _serving_input_receiver_fn(): 235 """A receiver function to be passed to export_savedmodel.""" 236 placeholders = {} 237 time_placeholder = array_ops.placeholder( 238 name=feature_keys.TrainEvalFeatures.TIMES, 239 dtype=dtypes.int64, 240 shape=[default_batch_size, default_series_length]) 241 placeholders[feature_keys.TrainEvalFeatures.TIMES] = time_placeholder 242 # Values are only necessary when filtering. For prediction the default 243 # value will be ignored. 244 placeholders[feature_keys.TrainEvalFeatures.VALUES] = ( 245 array_ops.placeholder_with_default( 246 name=feature_keys.TrainEvalFeatures.VALUES, 247 input=array_ops.zeros( 248 shape=[ 249 default_batch_size 250 if default_batch_size else 0, default_series_length 251 if default_series_length else 0, self._model.num_features 252 ], 253 dtype=self._model.dtype), 254 shape=(default_batch_size, default_series_length, 255 self._model.num_features))) 256 if self._model.exogenous_feature_columns: 257 with ops.Graph().as_default(): 258 # Default placeholders have only an unknown batch dimension. Make them 259 # in a separate graph, then splice in the series length to the shapes 260 # and re-create them in the outer graph. 261 parsed_features = ( 262 feature_column.make_parse_example_spec( 263 self._model.exogenous_feature_columns)) 264 placeholder_features = parsing_ops.parse_example( 265 serialized=array_ops.placeholder( 266 shape=[None], dtype=dtypes.string), 267 features=parsed_features) 268 exogenous_feature_shapes = { 269 key: (value.get_shape(), value.dtype) for key, value 270 in placeholder_features.items()} 271 for feature_key, (batch_only_feature_shape, value_dtype) in ( 272 exogenous_feature_shapes.items()): 273 batch_only_feature_shape = ( 274 batch_only_feature_shape.with_rank_at_least(1).as_list()) 275 feature_shape = ([default_batch_size, default_series_length] 276 + batch_only_feature_shape[1:]) 277 placeholders[feature_key] = array_ops.placeholder( 278 dtype=value_dtype, name=feature_key, shape=feature_shape) 279 batch_size_tensor = array_ops.shape(time_placeholder)[0] 280 placeholders.update( 281 self._model_start_state_placeholders( 282 batch_size_tensor, static_batch_size=default_batch_size)) 283 return export_lib.ServingInputReceiver(placeholders, placeholders) 284 285 return _serving_input_receiver_fn 286 287 288class ARRegressor(TimeSeriesRegressor): 289 """An Estimator for an (optionally non-linear) autoregressive model. 290 291 ARRegressor is a window-based model, inputting fixed windows of length 292 `input_window_size` and outputting fixed windows of length 293 `output_window_size`. These two parameters must add up to the window_size 294 passed to the `Chunker` used to create an `input_fn` for training or 295 evaluation. `RandomWindowInputFn` is suggested for both training and 296 evaluation, although it may be seeded for deterministic evaluation. 297 """ 298 299 def __init__( 300 self, periodicities, input_window_size, output_window_size, 301 num_features, exogenous_feature_columns=None, num_time_buckets=10, 302 loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, hidden_layer_sizes=None, 303 anomaly_prior_probability=None, anomaly_distribution=None, 304 optimizer=None, model_dir=None, config=None): 305 """Initialize the Estimator. 306 307 Args: 308 periodicities: periodicities of the input data, in the same units as the 309 time feature. Note this can be a single value or a list of values for 310 multiple periodicities. 311 input_window_size: Number of past time steps of data to look at when doing 312 the regression. 313 output_window_size: Number of future time steps to predict. Note that 314 setting it to > 1 empirically seems to give a better fit. 315 num_features: The dimensionality of the time series (one for univariate, 316 more than one for multivariate). 317 exogenous_feature_columns: A list of `tf.feature_column`s (for example 318 `tf.feature_column.embedding_column`) corresponding to exogenous 319 features which provide extra information to the model but are not part 320 of the series to be predicted. Passed to 321 `tf.feature_column.input_layer`. 322 num_time_buckets: Number of buckets into which to divide (time % 323 periodicity) for generating time based features. 324 loss: Loss function to use for training. Currently supported values are 325 SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for 326 NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For 327 SQUARED_LOSS, the evaluation loss is reported based on un-scaled 328 observations and predictions, while the training loss is computed on 329 normalized data. 330 hidden_layer_sizes: list of sizes of hidden layers. 331 anomaly_prior_probability: If specified, constructs a mixture model under 332 which anomalies (modeled with `anomaly_distribution`) have this prior 333 probability. See `AnomalyMixtureARModel`. 334 anomaly_distribution: May not be specified unless 335 anomaly_prior_probability is specified and is not None. Controls the 336 distribution of anomalies under the mixture model. Currently either 337 `ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY` or 338 `ar_model.AnomalyMixtureARModel.CAUCHY_ANOMALY`. See 339 `AnomalyMixtureARModel`. Defaults to `GAUSSIAN_ANOMALY`. 340 optimizer: The optimization algorithm to use when training, inheriting 341 from tf.train.Optimizer. Defaults to Adagrad with step size 0.1. 342 model_dir: See `Estimator`. 343 config: See `Estimator`. 344 Raises: 345 ValueError: For invalid combinations of arguments. 346 """ 347 if optimizer is None: 348 optimizer = train.AdagradOptimizer(0.1) 349 if anomaly_prior_probability is None and anomaly_distribution is not None: 350 raise ValueError("anomaly_prior_probability is required if " 351 "anomaly_distribution is specified.") 352 if anomaly_prior_probability is None: 353 if anomaly_distribution is None: 354 anomaly_distribution = ar_model.AnomalyMixtureARModel.GAUSSIAN_ANOMALY 355 model = ar_model.ARModel( 356 periodicities=periodicities, num_features=num_features, 357 prediction_model_factory=functools.partial( 358 ar_model.FlatPredictionModel, 359 hidden_layer_sizes=hidden_layer_sizes), 360 exogenous_feature_columns=exogenous_feature_columns, 361 num_time_buckets=num_time_buckets, 362 input_window_size=input_window_size, 363 output_window_size=output_window_size, loss=loss) 364 else: 365 if loss != ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS: 366 raise ValueError( 367 "AnomalyMixtureARModel only supports " 368 "ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS for its loss argument.") 369 model = ar_model.AnomalyMixtureARModel( 370 periodicities=periodicities, 371 input_window_size=input_window_size, 372 output_window_size=output_window_size, 373 num_features=num_features, 374 prediction_model_factory=functools.partial( 375 ar_model.FlatPredictionModel, 376 hidden_layer_sizes=hidden_layer_sizes), 377 exogenous_feature_columns=exogenous_feature_columns, 378 num_time_buckets=num_time_buckets, 379 anomaly_prior_probability=anomaly_prior_probability, 380 anomaly_distribution=anomaly_distribution) 381 state_manager = state_management.FilteringOnlyStateManager() 382 super(ARRegressor, self).__init__( 383 model=model, 384 state_manager=state_manager, 385 optimizer=optimizer, 386 model_dir=model_dir, 387 config=config) 388 389 390# TODO(b/113684821): Add detailed documentation on what the input_fn should do. 391# Add an example of making and returning a Dataset object. Determine if 392# endogenous features can be passed in as FeatureColumns. Move ARModel's loss 393# functions into a more general location. 394class LSTMAutoRegressor(TimeSeriesRegressor): 395 """An Estimator for an LSTM autoregressive model. 396 397 LSTMAutoRegressor is a window-based model, inputting fixed windows of length 398 `input_window_size` and outputting fixed windows of length 399 `output_window_size`. These two parameters must add up to the window_size 400 of data returned by the `input_fn`. 401 402 Each periodicity in the `periodicities` arg is divided by the `num_timesteps` 403 into timesteps that are represented as time features added to the model. 404 405 A good heuristic for picking an appropriate periodicity for a given data set 406 would be the length of cycles in the data. For example, energy usage in a 407 home is typically cyclic each day. If the time feature in a home energy 408 usage dataset is in the unit of hours, then 24 would be an appropriate 409 periodicity. Similarly, a good heuristic for `num_timesteps` is how often the 410 data is expected to change within the cycle. For the aforementioned home 411 energy usage dataset and periodicity of 24, then 48 would be a reasonable 412 value if usage is expected to change every half hour. 413 414 Each feature's value for a given example with time t is the difference 415 between t and the start of the timestep it falls under. If it doesn't fall 416 under a feature's associated timestep, then that feature's value is zero. 417 418 For example: if `periodicities` = (9, 12) and `num_timesteps` = 3, then 6 419 features would be added to the model, 3 for periodicity 9 and 3 for 420 periodicity 12. 421 422 For an example data point where t = 17: 423 - It's in the 3rd timestep for periodicity 9 (2nd period is 9-18 and 3rd 424 timestep is 15-18) 425 - It's in the 2nd timestep for periodicity 12 (2nd period is 12-24 and 426 2nd timestep is between 16-20). 427 428 Therefore the 6 added features for this row with t = 17 would be: 429 430 # Feature name (periodicity#_timestep#), feature value 431 P9_T1, 0 # not in first timestep 432 P9_T2, 0 # not in second timestep 433 P9_T3, 2 # 17 - 15 since 15 is the start of the 3rd timestep 434 P12_T1, 0 # not in first timestep 435 P12_T2, 1 # 17 - 16 since 16 is the start of the 2nd timestep 436 P12_T3, 0 # not in third timestep 437 438 Example Code: 439 440 ```python 441 extra_feature_columns = ( 442 feature_column.numeric_column("exogenous_variable"), 443 ) 444 445 estimator = LSTMAutoRegressor( 446 periodicities=10, 447 input_window_size=10, 448 output_window_size=5, 449 model_dir="/path/to/model/dir", 450 num_features=1, 451 extra_feature_columns=extra_feature_columns, 452 num_timesteps=50, 453 num_units=10, 454 optimizer=tf.train.ProximalAdagradOptimizer(...)) 455 456 # Input builders 457 def input_fn_train(): 458 return { 459 "times": tf.range(15)[None, :], 460 "values": tf.random_normal(shape=[1, 15, 1]) 461 } 462 estimator.train(input_fn=input_fn_train, steps=100) 463 464 def input_fn_eval(): 465 pass 466 metrics = estimator.evaluate(input_fn=input_fn_eval, steps=10) 467 468 def input_fn_predict(): 469 pass 470 predictions = estimator.predict(input_fn=input_fn_predict) 471 ``` 472 """ 473 474 def __init__(self, 475 periodicities, 476 input_window_size, 477 output_window_size, 478 model_dir=None, 479 num_features=1, 480 extra_feature_columns=None, 481 num_timesteps=10, 482 loss=ar_model.ARModel.NORMAL_LIKELIHOOD_LOSS, 483 num_units=128, 484 optimizer="Adam", 485 config=None): 486 """Initialize the Estimator. 487 488 Args: 489 periodicities: periodicities of the input data, in the same units as the 490 time feature (for example 24 if feeding hourly data with a daily 491 periodicity, or 60 * 24 if feeding minute-level data with daily 492 periodicity). Note this can be a single value or a list of values for 493 multiple periodicities. 494 input_window_size: Number of past time steps of data to look at when doing 495 the regression. 496 output_window_size: Number of future time steps to predict. Note that 497 setting this value to > 1 empirically seems to give a better fit. 498 model_dir: Directory to save model parameters, graph and etc. This can 499 also be used to load checkpoints from the directory into a estimator 500 to continue training a previously saved model. 501 num_features: The dimensionality of the time series (default value is 502 one for univariate, more than one for multivariate). 503 extra_feature_columns: A list of `tf.feature_column`s (for example 504 `tf.feature_column.embedding_column`) corresponding to features which 505 provide extra information to the model but are not part of the series to 506 be predicted. 507 num_timesteps: Number of buckets into which to divide (time % 508 periodicity). This value multiplied by the number of periodicities is 509 the number of time features added to the model. 510 loss: Loss function to use for training. Currently supported values are 511 SQUARED_LOSS and NORMAL_LIKELIHOOD_LOSS. Note that for 512 NORMAL_LIKELIHOOD_LOSS, we train the covariance term as well. For 513 SQUARED_LOSS, the evaluation loss is reported based on un-scaled 514 observations and predictions, while the training loss is computed on 515 normalized data. 516 num_units: The size of the hidden state in the encoder and decoder LSTM 517 cells. 518 optimizer: string, `tf.train.Optimizer` object, or callable that defines 519 the optimizer algorithm to use for training. Defaults to the Adam 520 optimizer with a learning rate of 0.01. 521 config: Optional `estimator.RunConfig` object to configure the runtime 522 settings. 523 """ 524 optimizer = optimizers.get_optimizer_instance( 525 optimizer, learning_rate=0.01) 526 model = ar_model.ARModel( 527 periodicities=periodicities, 528 input_window_size=input_window_size, 529 output_window_size=output_window_size, 530 num_features=num_features, 531 exogenous_feature_columns=extra_feature_columns, 532 num_time_buckets=num_timesteps, 533 loss=loss, 534 prediction_model_factory=functools.partial( 535 ar_model.LSTMPredictionModel, num_units=num_units)) 536 state_manager = state_management.FilteringOnlyStateManager() 537 super(LSTMAutoRegressor, self).__init__( 538 model=model, 539 state_manager=state_manager, 540 optimizer=optimizer, 541 model_dir=model_dir, 542 config=config, 543 head_type=ts_head_lib.OneShotPredictionHead) 544 545 546class StateSpaceRegressor(TimeSeriesRegressor): 547 """An Estimator for general state space models.""" 548 549 def __init__(self, model, state_manager=None, optimizer=None, model_dir=None, 550 config=None, head_type=ts_head_lib.TimeSeriesRegressionHead): 551 """See TimeSeriesRegressor. Uses the ChainingStateManager by default.""" 552 if not isinstance(model, state_space_model.StateSpaceModel): 553 raise ValueError( 554 "StateSpaceRegressor only supports state space models (children of " 555 "StateSpaceModel) in its `model` argument, got {}.".format(model)) 556 if state_manager is None: 557 state_manager = state_management.ChainingStateManager() 558 super(StateSpaceRegressor, self).__init__( 559 model=model, 560 state_manager=state_manager, 561 optimizer=optimizer, 562 model_dir=model_dir, 563 config=config, 564 head_type=head_type) 565 566 567class StructuralEnsembleRegressor(StateSpaceRegressor): 568 """An Estimator for structural time series models. 569 570 "Structural" refers to the fact that this model explicitly accounts for 571 structure in the data, such as periodicity and trends. 572 573 `StructuralEnsembleRegressor` is a state space model. It contains components 574 for modeling level, local linear trends, periodicity, and mean-reverting 575 transients via a moving average component. Multivariate series are fit with 576 full covariance matrices for observation and latent state transition noise, 577 each feature of the multivariate series having its own latent components. 578 579 Note that unlike `ARRegressor`, `StructuralEnsembleRegressor` is sequential, 580 and so accepts variable window sizes with the same model. 581 582 For training, `RandomWindowInputFn` is recommended as an `input_fn`. Model 583 state is managed through `ChainingStateManager`: since state space models are 584 inherently sequential, we save state from previous iterations to get 585 approximate/eventual consistency while achieving good performance through 586 batched computation. 587 588 For evaluation, either pass a significant chunk of the series in a single 589 window (e.g. set `window_size` to the whole series with 590 `WholeDatasetInputFn`), or use enough random evaluation iterations to cover 591 several passes through the whole dataset. Either method will ensure that stale 592 saved state has been flushed. 593 """ 594 595 def __init__(self, 596 periodicities, 597 num_features, 598 cycle_num_latent_values=11, 599 moving_average_order=4, 600 autoregressive_order=0, 601 exogenous_feature_columns=None, 602 exogenous_update_condition=None, 603 dtype=dtypes.float64, 604 anomaly_prior_probability=None, 605 optimizer=None, 606 model_dir=None, 607 config=None, 608 head_type=ts_head_lib.TimeSeriesRegressionHead): 609 """Initialize the Estimator. 610 611 Args: 612 periodicities: The expected periodicity of the data (for example 24 if 613 feeding hourly data with a daily periodicity, or 60 * 24 if feeding 614 minute-level data with daily periodicity). Either a scalar or a 615 list. This parameter can be any real value, and does not control the 616 size of the model. However, increasing this without increasing 617 `num_values_per_cycle` will lead to smoother periodic behavior, as the 618 same number of distinct values will be cycled through over a longer 619 period of time. 620 num_features: The dimensionality of the time series (one for univariate, 621 more than one for multivariate). 622 cycle_num_latent_values: Along with `moving_average_order` and 623 `num_features`, controls the latent state size of the model. Square 624 matrices of size `num_features * (moving_average_order + 625 cycle_num_latent_values + 3)` are created and multiplied, so larger 626 values may be slow. The trade-off is with resolution: cycling between 627 a smaller number of latent values means that only smoother functions 628 can be modeled. 629 moving_average_order: Controls model size (along with 630 `cycle_num_latent_values` and `autoregressive_order`) and the number 631 of steps before transient deviations revert to the mean defined by the 632 period and level/trend components. 633 autoregressive_order: Each contribution from this component is a linear 634 combination of this many previous contributions. Also helps to 635 determine the model size. Learning autoregressive coefficients 636 typically requires more steps and a smaller step size than other 637 components. 638 exogenous_feature_columns: A list of `tf.feature_column`s (for example 639 `tf.feature_column.embedding_column`) corresponding to exogenous 640 features which provide extra information to the model but are not part 641 of the series to be predicted. Passed to 642 `tf.feature_column.input_layer`. 643 exogenous_update_condition: A function taking two Tensor arguments, 644 `times` (shape [batch size]) and `features` (a dictionary mapping 645 exogenous feature keys to Tensors with shapes [batch size, ...]), and 646 returning a boolean Tensor with shape [batch size] indicating whether 647 state should be updated using exogenous features for each part of the 648 batch. Where it is False, no exogenous update is performed. If None 649 (default), exogenous updates are always performed. Useful for avoiding 650 "leaky" frequent exogenous updates when sparse updates are 651 desired. Called only during graph construction. See the "known 652 anomaly" example for example usage. 653 dtype: The floating point data type to compute with. float32 may be 654 faster, but can be problematic for larger models and longer time series. 655 anomaly_prior_probability: If not None, the model attempts to 656 automatically detect and ignore anomalies during training. This 657 parameter then controls the prior probability of an anomaly. Values 658 closer to 0 mean that points will be discarded less frequently. The 659 default value (None) means that anomalies are not discarded, which may 660 be slightly faster. 661 optimizer: The optimization algorithm to use when training, inheriting 662 from tf.train.Optimizer. Defaults to Adam with step size 0.02. 663 model_dir: See `Estimator`. 664 config: See `Estimator`. 665 head_type: The kind of head to use for the model (inheriting from 666 `TimeSeriesRegressionHead`). 667 """ 668 if anomaly_prior_probability is not None: 669 filtering_postprocessor = StateInterpolatingAnomalyDetector( 670 anomaly_prior_probability=anomaly_prior_probability) 671 else: 672 filtering_postprocessor = None 673 state_space_model_configuration = ( 674 state_space_model.StateSpaceModelConfiguration( 675 num_features=num_features, 676 dtype=dtype, 677 filtering_postprocessor=filtering_postprocessor, 678 exogenous_feature_columns=exogenous_feature_columns, 679 exogenous_update_condition=exogenous_update_condition)) 680 model = structural_ensemble.MultiResolutionStructuralEnsemble( 681 cycle_num_latent_values=cycle_num_latent_values, 682 moving_average_order=moving_average_order, 683 autoregressive_order=autoregressive_order, 684 periodicities=periodicities, 685 configuration=state_space_model_configuration) 686 super(StructuralEnsembleRegressor, self).__init__( 687 model=model, 688 optimizer=optimizer, 689 model_dir=model_dir, 690 config=config, 691 head_type=head_type) 692