1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15 16"""Linear Estimators (deprecated). 17 18This module and all its submodules are deprecated. See 19[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 20for migration instructions. 21""" 22 23from __future__ import absolute_import 24from __future__ import division 25from __future__ import print_function 26 27import math 28 29import six 30 31from tensorflow.contrib import layers 32from tensorflow.contrib.framework import deprecated 33from tensorflow.contrib.framework import deprecated_arg_values 34from tensorflow.contrib.layers.python.layers import feature_column 35from tensorflow.contrib.learn.python.learn.estimators import estimator 36from tensorflow.contrib.learn.python.learn.estimators import head as head_lib 37from tensorflow.contrib.learn.python.learn.estimators import prediction_key 38from tensorflow.contrib.learn.python.learn.utils import export 39from tensorflow.contrib.linear_optimizer.python import sdca_optimizer 40from tensorflow.python.feature_column import feature_column_lib as fc_core 41from tensorflow.python.framework import dtypes 42from tensorflow.python.framework import ops 43from tensorflow.python.framework import sparse_tensor 44from tensorflow.python.framework import tensor_util 45from tensorflow.python.ops import array_ops 46from tensorflow.python.ops import clip_ops 47from tensorflow.python.ops import gradients 48from tensorflow.python.ops import partitioned_variables 49from tensorflow.python.ops import variable_scope 50from tensorflow.python.platform import tf_logging as logging 51from tensorflow.python.training import session_run_hook 52from tensorflow.python.training import training as train 53from tensorflow.python.training import training_util 54 55 56# The default learning rate of 0.2 is a historical artifact of the initial 57# implementation, but seems a reasonable choice. 58_LEARNING_RATE = 0.2 59 60 61def _get_optimizer(spec): 62 if isinstance(spec, six.string_types): 63 return layers.OPTIMIZER_CLS_NAMES[spec]( 64 learning_rate=_LEARNING_RATE) 65 elif callable(spec): 66 return spec() 67 return spec 68 69 70# TODO(ispir): Remove this function by fixing '_infer_model' with single outputs 71# and as_iteable case. 72def _as_iterable(preds, output): 73 for pred in preds: 74 yield pred[output] 75 76 77def _add_bias_column(feature_columns, columns_to_tensors, bias_variable, 78 columns_to_variables): 79 """Adds a fake bias feature column filled with all 1s.""" 80 # TODO(b/31008490): Move definition to a common constants place. 81 bias_column_name = "tf_virtual_bias_column" 82 if any(col.name is bias_column_name for col in feature_columns): 83 raise ValueError("%s is a reserved column name." % bias_column_name) 84 if not feature_columns: 85 raise ValueError("feature_columns can't be empty.") 86 87 # Loop through input tensors until we can figure out batch_size. 88 batch_size = None 89 for column in columns_to_tensors.values(): 90 if isinstance(column, tuple): 91 column = column[0] 92 if isinstance(column, sparse_tensor.SparseTensor): 93 shape = tensor_util.constant_value(column.dense_shape) 94 if shape is not None: 95 batch_size = shape[0] 96 break 97 else: 98 batch_size = array_ops.shape(column)[0] 99 break 100 if batch_size is None: 101 raise ValueError("Could not infer batch size from input features.") 102 103 bias_column = layers.real_valued_column(bias_column_name) 104 columns_to_tensors[bias_column] = array_ops.ones([batch_size, 1], 105 dtype=dtypes.float32) 106 columns_to_variables[bias_column] = [bias_variable] 107 108 109def _linear_model_fn(features, labels, mode, params, config=None): 110 """A model_fn for linear models that use a gradient-based optimizer. 111 112 Args: 113 features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). 114 labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of 115 dtype `int32` or `int64` in the range `[0, n_classes)`. 116 mode: Defines whether this is training, evaluation or prediction. 117 See `ModeKeys`. 118 params: A dict of hyperparameters. 119 The following hyperparameters are expected: 120 * head: A `Head` instance. 121 * feature_columns: An iterable containing all the feature columns used by 122 the model. 123 * optimizer: string, `Optimizer` object, or callable that defines the 124 optimizer to use for training. If `None`, will use a FTRL optimizer. 125 * gradient_clip_norm: A float > 0. If provided, gradients are 126 clipped to their global norm with this clipping ratio. 127 * joint_weights: If True, the weights for all columns will be stored in a 128 single (possibly partitioned) variable. It's more efficient, but it's 129 incompatible with SDCAOptimizer, and requires all feature columns are 130 sparse and use the 'sum' combiner. 131 config: `RunConfig` object to configure the runtime settings. 132 133 Returns: 134 A `ModelFnOps` instance. 135 136 Raises: 137 ValueError: If mode is not any of the `ModeKeys`. 138 """ 139 head = params["head"] 140 feature_columns = params["feature_columns"] 141 optimizer = params.get("optimizer") or _get_default_optimizer(feature_columns) 142 gradient_clip_norm = params.get("gradient_clip_norm", None) 143 num_ps_replicas = config.num_ps_replicas if config else 0 144 joint_weights = params.get("joint_weights", False) 145 146 if not isinstance(features, dict): 147 features = {"": features} 148 149 parent_scope = "linear" 150 partitioner = partitioned_variables.min_max_variable_partitioner( 151 max_partitions=num_ps_replicas, 152 min_slice_size=64 << 20) 153 154 with variable_scope.variable_scope( 155 parent_scope, 156 values=tuple(six.itervalues(features)), 157 partitioner=partitioner) as scope: 158 if all(isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access 159 for fc in feature_columns): 160 if joint_weights: 161 layer_fn = layers.joint_weighted_sum_from_feature_columns 162 else: 163 layer_fn = layers.weighted_sum_from_feature_columns 164 logits, _, _ = layer_fn( 165 columns_to_tensors=features, 166 feature_columns=feature_columns, 167 num_outputs=head.logits_dimension, 168 weight_collections=[parent_scope], 169 scope=scope) 170 else: 171 logits = fc_core.linear_model( 172 features=features, 173 feature_columns=feature_columns, 174 units=head.logits_dimension, 175 weight_collections=[parent_scope]) 176 177 def _train_op_fn(loss): 178 global_step = training_util.get_global_step() 179 my_vars = ops.get_collection(parent_scope) 180 grads = gradients.gradients(loss, my_vars) 181 if gradient_clip_norm: 182 grads, _ = clip_ops.clip_by_global_norm(grads, gradient_clip_norm) 183 return (_get_optimizer(optimizer).apply_gradients( 184 zip(grads, my_vars), global_step=global_step)) 185 186 return head.create_model_fn_ops( 187 features=features, 188 mode=mode, 189 labels=labels, 190 train_op_fn=_train_op_fn, 191 logits=logits) 192 193 194def sdca_model_fn(features, labels, mode, params): 195 """A model_fn for linear models that use the SDCA optimizer. 196 197 Args: 198 features: A dict of `Tensor` keyed by column name. 199 labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of 200 dtype `int32` or `int64` in the range `[0, n_classes)`. 201 mode: Defines whether this is training, evaluation or prediction. 202 See `ModeKeys`. 203 params: A dict of hyperparameters. 204 The following hyperparameters are expected: 205 * head: A `Head` instance. Type must be one of `_BinarySvmHead`, 206 `_RegressionHead` or `_BinaryLogisticHead`. 207 * feature_columns: An iterable containing all the feature columns used by 208 the model. 209 * optimizer: An `SDCAOptimizer` instance. 210 * weight_column_name: A string defining the weight feature column, or 211 None if there are no weights. 212 * update_weights_hook: A `SessionRunHook` object or None. Used to update 213 model weights. 214 215 Returns: 216 A `ModelFnOps` instance. 217 218 Raises: 219 ValueError: If `optimizer` is not an `SDCAOptimizer` instance. 220 ValueError: If the type of head is neither `_BinarySvmHead`, nor 221 `_RegressionHead` nor `_MultiClassHead`. 222 ValueError: If mode is not any of the `ModeKeys`. 223 """ 224 head = params["head"] 225 feature_columns = params["feature_columns"] 226 optimizer = params["optimizer"] 227 weight_column_name = params["weight_column_name"] 228 update_weights_hook = params.get("update_weights_hook", None) 229 230 if not isinstance(optimizer, sdca_optimizer.SDCAOptimizer): 231 raise ValueError("Optimizer must be of type SDCAOptimizer") 232 233 if isinstance(head, head_lib._BinarySvmHead): # pylint: disable=protected-access 234 loss_type = "hinge_loss" 235 elif isinstance(head, head_lib._BinaryLogisticHead): # pylint: disable=protected-access 236 loss_type = "logistic_loss" 237 elif isinstance(head, head_lib._RegressionHead): # pylint: disable=protected-access 238 assert head.logits_dimension == 1, ("SDCA only applies for " 239 "logits_dimension=1.") 240 loss_type = "squared_loss" 241 else: 242 raise ValueError("Unsupported head type: {}".format(head)) 243 244 parent_scope = "linear" 245 246 with variable_scope.variable_scope( 247 values=features.values(), 248 name_or_scope=parent_scope, 249 partitioner=optimizer.partitioner) as scope: 250 features = features.copy() 251 features.update(layers.transform_features(features, feature_columns)) 252 logits, columns_to_variables, bias = ( 253 layers.weighted_sum_from_feature_columns( 254 columns_to_tensors=features, 255 feature_columns=feature_columns, 256 num_outputs=1, 257 scope=scope)) 258 259 _add_bias_column(feature_columns, features, bias, columns_to_variables) 260 261 def _train_op_fn(unused_loss): 262 global_step = training_util.get_global_step() 263 sdca_model, train_op = optimizer.get_train_step(columns_to_variables, 264 weight_column_name, 265 loss_type, features, 266 labels, global_step) 267 if update_weights_hook is not None: 268 update_weights_hook.set_parameters(sdca_model, train_op) 269 return train_op 270 271 model_fn_ops = head.create_model_fn_ops( 272 features=features, 273 labels=labels, 274 mode=mode, 275 train_op_fn=_train_op_fn, 276 logits=logits) 277 if update_weights_hook is not None: 278 return model_fn_ops._replace( 279 training_chief_hooks=(model_fn_ops.training_chief_hooks + 280 [update_weights_hook])) 281 return model_fn_ops 282 283 284# Ensures consistency with LinearComposableModel. 285def _get_default_optimizer(feature_columns): 286 learning_rate = min(_LEARNING_RATE, 1.0 / math.sqrt(len(feature_columns))) 287 return train.FtrlOptimizer(learning_rate=learning_rate) 288 289 290class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): 291 """SessionRunHook to update and shrink SDCA model weights.""" 292 293 def __init__(self): 294 pass 295 296 def set_parameters(self, sdca_model, train_op): 297 self._sdca_model = sdca_model 298 self._train_op = train_op 299 300 def begin(self): 301 """Construct the update_weights op. 302 303 The op is implicitly added to the default graph. 304 """ 305 self._update_op = self._sdca_model.update_weights(self._train_op) 306 307 def before_run(self, run_context): 308 """Return the update_weights op so that it is executed during this run.""" 309 return session_run_hook.SessionRunArgs(self._update_op) 310 311 312class LinearClassifier(estimator.Estimator): 313 """Linear classifier model. 314 315 THIS CLASS IS DEPRECATED. See 316 [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 317 for general migration instructions. 318 319 Train a linear model to classify instances into one of multiple possible 320 classes. When number of possible classes is 2, this is binary classification. 321 322 Example: 323 324 ```python 325 sparse_column_a = sparse_column_with_hash_bucket(...) 326 sparse_column_b = sparse_column_with_hash_bucket(...) 327 328 sparse_feature_a_x_sparse_feature_b = crossed_column(...) 329 330 # Estimator using the default optimizer. 331 estimator = LinearClassifier( 332 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]) 333 334 # Or estimator using the FTRL optimizer with regularization. 335 estimator = LinearClassifier( 336 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b], 337 optimizer=tf.train.FtrlOptimizer( 338 learning_rate=0.1, 339 l1_regularization_strength=0.001 340 )) 341 342 # Or estimator using the SDCAOptimizer. 343 estimator = LinearClassifier( 344 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b], 345 optimizer=tf.contrib.linear_optimizer.SDCAOptimizer( 346 example_id_column='example_id', 347 num_loss_partitions=..., 348 symmetric_l2_regularization=2.0 349 )) 350 351 # Input builders 352 def input_fn_train: # returns x, y (where y represents label's class index). 353 ... 354 def input_fn_eval: # returns x, y (where y represents label's class index). 355 ... 356 def input_fn_predict: # returns x, None. 357 ... 358 estimator.fit(input_fn=input_fn_train) 359 estimator.evaluate(input_fn=input_fn_eval) 360 # predict_classes returns class indices. 361 estimator.predict_classes(input_fn=input_fn_predict) 362 ``` 363 364 If the user specifies `label_keys` in constructor, labels must be strings from 365 the `label_keys` vocabulary. Example: 366 367 ```python 368 label_keys = ['label0', 'label1', 'label2'] 369 estimator = LinearClassifier( 370 n_classes=n_classes, 371 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b], 372 label_keys=label_keys) 373 374 def input_fn_train: # returns x, y (where y is one of label_keys). 375 pass 376 estimator.fit(input_fn=input_fn_train) 377 378 def input_fn_eval: # returns x, y (where y is one of label_keys). 379 pass 380 estimator.evaluate(input_fn=input_fn_eval) 381 def input_fn_predict: # returns x, None 382 # predict_classes returns one of label_keys. 383 estimator.predict_classes(input_fn=input_fn_predict) 384 ``` 385 386 Input of `fit` and `evaluate` should have following features, 387 otherwise there will be a `KeyError`: 388 389 * if `weight_column_name` is not `None`, a feature with 390 `key=weight_column_name` whose value is a `Tensor`. 391 * for each `column` in `feature_columns`: 392 - if `column` is a `SparseColumn`, a feature with `key=column.name` 393 whose `value` is a `SparseTensor`. 394 - if `column` is a `WeightedSparseColumn`, two features: the first with 395 `key` the id column name, the second with `key` the weight column name. 396 Both features' `value` must be a `SparseTensor`. 397 - if `column` is a `RealValuedColumn`, a feature with `key=column.name` 398 whose `value` is a `Tensor`. 399 """ 400 401 def __init__(self, # _joint_weight pylint: disable=invalid-name 402 feature_columns, 403 model_dir=None, 404 n_classes=2, 405 weight_column_name=None, 406 optimizer=None, 407 gradient_clip_norm=None, 408 enable_centered_bias=False, 409 _joint_weight=False, 410 config=None, 411 feature_engineering_fn=None, 412 label_keys=None): 413 """Construct a `LinearClassifier` estimator object. 414 415 Args: 416 feature_columns: An iterable containing all the feature columns used by 417 the model. All items in the set should be instances of classes derived 418 from `FeatureColumn`. 419 model_dir: Directory to save model parameters, graph and etc. This can 420 also be used to load checkpoints from the directory into a estimator 421 to continue training a previously saved model. 422 n_classes: number of label classes. Default is binary classification. 423 Note that class labels are integers representing the class index (i.e. 424 values from 0 to n_classes-1). For arbitrary label values (e.g. string 425 labels), convert to class indices first. 426 weight_column_name: A string defining feature column name representing 427 weights. It is used to down weight or boost examples during training. It 428 will be multiplied by the loss of the example. 429 optimizer: The optimizer used to train the model. If specified, it should 430 be either an instance of `tf.Optimizer` or the SDCAOptimizer. If `None`, 431 the Ftrl optimizer will be used. 432 gradient_clip_norm: A `float` > 0. If provided, gradients are clipped 433 to their global norm with this clipping ratio. See 434 `tf.clip_by_global_norm` for more details. 435 enable_centered_bias: A bool. If True, estimator will learn a centered 436 bias variable for each class. Rest of the model structure learns the 437 residual after centered bias. 438 _joint_weight: If True, the weights for all columns will be stored in a 439 single (possibly partitioned) variable. It's more efficient, but it's 440 incompatible with SDCAOptimizer, and requires all feature columns are 441 sparse and use the 'sum' combiner. 442 config: `RunConfig` object to configure the runtime settings. 443 feature_engineering_fn: Feature engineering function. Takes features and 444 labels which are the output of `input_fn` and 445 returns features and labels which will be fed 446 into the model. 447 label_keys: Optional list of strings with size `[n_classes]` defining the 448 label vocabulary. Only supported for `n_classes` > 2. 449 450 Returns: 451 A `LinearClassifier` estimator. 452 453 Raises: 454 ValueError: if n_classes < 2. 455 ValueError: if enable_centered_bias=True and optimizer is SDCAOptimizer. 456 """ 457 if (isinstance(optimizer, sdca_optimizer.SDCAOptimizer) and 458 enable_centered_bias): 459 raise ValueError("enable_centered_bias is not supported with SDCA") 460 461 self._feature_columns = tuple(feature_columns or []) 462 assert self._feature_columns 463 464 chief_hook = None 465 head = head_lib.multi_class_head( 466 n_classes, 467 weight_column_name=weight_column_name, 468 enable_centered_bias=enable_centered_bias, 469 label_keys=label_keys) 470 params = { 471 "head": head, 472 "feature_columns": feature_columns, 473 "optimizer": optimizer, 474 } 475 476 if isinstance(optimizer, sdca_optimizer.SDCAOptimizer): 477 assert not _joint_weight, ("_joint_weight is incompatible with the" 478 " SDCAOptimizer") 479 assert n_classes == 2, "SDCA only applies to binary classification." 480 481 model_fn = sdca_model_fn 482 # The model_fn passes the model parameters to the chief_hook. We then use 483 # the hook to update weights and shrink step only on the chief. 484 chief_hook = _SdcaUpdateWeightsHook() 485 params.update({ 486 "weight_column_name": weight_column_name, 487 "update_weights_hook": chief_hook, 488 }) 489 else: 490 model_fn = _linear_model_fn 491 params.update({ 492 "gradient_clip_norm": gradient_clip_norm, 493 "joint_weights": _joint_weight, 494 }) 495 496 super(LinearClassifier, self).__init__( 497 model_fn=model_fn, 498 model_dir=model_dir, 499 config=config, 500 params=params, 501 feature_engineering_fn=feature_engineering_fn) 502 503 @deprecated_arg_values( 504 estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, 505 as_iterable=False) 506 @deprecated_arg_values( 507 "2017-03-01", 508 "Please switch to predict_classes, or set `outputs` argument.", 509 outputs=None) 510 def predict(self, x=None, input_fn=None, batch_size=None, outputs=None, 511 as_iterable=True): 512 """Returns predictions for given features. 513 514 By default, returns predicted classes. But this default will be dropped 515 soon. Users should either pass `outputs`, or call `predict_classes` method. 516 517 Args: 518 x: features. 519 input_fn: Input function. If set, x must be None. 520 batch_size: Override default batch size. 521 outputs: list of `str`, name of the output to predict. 522 If `None`, returns classes. 523 as_iterable: If True, return an iterable which keeps yielding predictions 524 for each example until inputs are exhausted. Note: The inputs must 525 terminate if you want the iterable to terminate (e.g. be sure to pass 526 num_epochs=1 if you are using something like read_batch_features). 527 528 Returns: 529 Numpy array of predicted classes with shape [batch_size] (or an iterable 530 of predicted classes if as_iterable is True). Each predicted class is 531 represented by its class index (i.e. integer from 0 to n_classes-1). 532 If `outputs` is set, returns a dict of predictions. 533 """ 534 if not outputs: 535 return self.predict_classes( 536 x=x, 537 input_fn=input_fn, 538 batch_size=batch_size, 539 as_iterable=as_iterable) 540 return super(LinearClassifier, self).predict( 541 x=x, 542 input_fn=input_fn, 543 batch_size=batch_size, 544 outputs=outputs, 545 as_iterable=as_iterable) 546 547 @deprecated_arg_values( 548 estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, 549 as_iterable=False) 550 def predict_classes(self, x=None, input_fn=None, batch_size=None, 551 as_iterable=True): 552 """Returns predicted classes for given features. 553 554 Args: 555 x: features. 556 input_fn: Input function. If set, x must be None. 557 batch_size: Override default batch size. 558 as_iterable: If True, return an iterable which keeps yielding predictions 559 for each example until inputs are exhausted. Note: The inputs must 560 terminate if you want the iterable to terminate (e.g. be sure to pass 561 num_epochs=1 if you are using something like read_batch_features). 562 563 Returns: 564 Numpy array of predicted classes with shape [batch_size] (or an iterable 565 of predicted classes if as_iterable is True). Each predicted class is 566 represented by its class index (i.e. integer from 0 to n_classes-1). 567 """ 568 key = prediction_key.PredictionKey.CLASSES 569 preds = super(LinearClassifier, self).predict( 570 x=x, 571 input_fn=input_fn, 572 batch_size=batch_size, 573 outputs=[key], 574 as_iterable=as_iterable) 575 if as_iterable: 576 return _as_iterable(preds, output=key) 577 return preds[key] 578 579 @deprecated_arg_values( 580 estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, 581 as_iterable=False) 582 def predict_proba(self, x=None, input_fn=None, batch_size=None, 583 as_iterable=True): 584 """Returns predicted probabilities for given features. 585 586 Args: 587 x: features. 588 input_fn: Input function. If set, x and y must be None. 589 batch_size: Override default batch size. 590 as_iterable: If True, return an iterable which keeps yielding predictions 591 for each example until inputs are exhausted. Note: The inputs must 592 terminate if you want the iterable to terminate (e.g. be sure to pass 593 num_epochs=1 if you are using something like read_batch_features). 594 595 Returns: 596 Numpy array of predicted probabilities with shape [batch_size, n_classes] 597 (or an iterable of predicted probabilities if as_iterable is True). 598 """ 599 key = prediction_key.PredictionKey.PROBABILITIES 600 preds = super(LinearClassifier, self).predict( 601 x=x, 602 input_fn=input_fn, 603 batch_size=batch_size, 604 outputs=[key], 605 as_iterable=as_iterable) 606 if as_iterable: 607 return _as_iterable(preds, output=key) 608 return preds[key] 609 610 @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.") 611 def export(self, 612 export_dir, 613 input_fn=None, 614 input_feature_key=None, 615 use_deprecated_input_fn=True, 616 signature_fn=None, 617 default_batch_size=1, 618 exports_to_keep=None): 619 """See BaseEstimator.export.""" 620 def default_input_fn(unused_estimator, examples): 621 return layers.parse_feature_columns_from_examples( 622 examples, self._feature_columns) 623 624 return super(LinearClassifier, self).export( 625 export_dir=export_dir, 626 input_fn=input_fn or default_input_fn, 627 input_feature_key=input_feature_key, 628 use_deprecated_input_fn=use_deprecated_input_fn, 629 signature_fn=(signature_fn or 630 export.classification_signature_fn_with_prob), 631 prediction_key=prediction_key.PredictionKey.PROBABILITIES, 632 default_batch_size=default_batch_size, 633 exports_to_keep=exports_to_keep) 634 635 636class LinearRegressor(estimator.Estimator): 637 """Linear regressor model. 638 639 THIS CLASS IS DEPRECATED. See 640 [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 641 for general migration instructions. 642 643 Train a linear regression model to predict label value given observation of 644 feature values. 645 646 Example: 647 648 ```python 649 sparse_column_a = sparse_column_with_hash_bucket(...) 650 sparse_column_b = sparse_column_with_hash_bucket(...) 651 652 sparse_feature_a_x_sparse_feature_b = crossed_column(...) 653 654 estimator = LinearRegressor( 655 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b]) 656 657 # Input builders 658 def input_fn_train: # returns x, y 659 ... 660 def input_fn_eval: # returns x, y 661 ... 662 estimator.fit(input_fn=input_fn_train) 663 estimator.evaluate(input_fn=input_fn_eval) 664 estimator.predict(x=x) 665 ``` 666 667 Input of `fit` and `evaluate` should have following features, 668 otherwise there will be a KeyError: 669 670 * if `weight_column_name` is not `None`: 671 key=weight_column_name, value=a `Tensor` 672 * for column in `feature_columns`: 673 - if isinstance(column, `SparseColumn`): 674 key=column.name, value=a `SparseTensor` 675 - if isinstance(column, `WeightedSparseColumn`): 676 {key=id column name, value=a `SparseTensor`, 677 key=weight column name, value=a `SparseTensor`} 678 - if isinstance(column, `RealValuedColumn`): 679 key=column.name, value=a `Tensor` 680 """ 681 682 def __init__(self, # _joint_weights: pylint: disable=invalid-name 683 feature_columns, 684 model_dir=None, 685 weight_column_name=None, 686 optimizer=None, 687 gradient_clip_norm=None, 688 enable_centered_bias=False, 689 label_dimension=1, 690 _joint_weights=False, 691 config=None, 692 feature_engineering_fn=None): 693 """Construct a `LinearRegressor` estimator object. 694 695 Args: 696 feature_columns: An iterable containing all the feature columns used by 697 the model. All items in the set should be instances of classes derived 698 from `FeatureColumn`. 699 model_dir: Directory to save model parameters, graph, etc. This can 700 also be used to load checkpoints from the directory into a estimator 701 to continue training a previously saved model. 702 weight_column_name: A string defining feature column name representing 703 weights. It is used to down weight or boost examples during training. It 704 will be multiplied by the loss of the example. 705 optimizer: An instance of `tf.Optimizer` used to train the model. If 706 `None`, will use an Ftrl optimizer. 707 gradient_clip_norm: A `float` > 0. If provided, gradients are clipped 708 to their global norm with this clipping ratio. See 709 `tf.clip_by_global_norm` for more details. 710 enable_centered_bias: A bool. If True, estimator will learn a centered 711 bias variable for each class. Rest of the model structure learns the 712 residual after centered bias. 713 label_dimension: Number of regression targets per example. This is the 714 size of the last dimension of the labels and logits `Tensor` objects 715 (typically, these have shape `[batch_size, label_dimension]`). 716 _joint_weights: If True use a single (possibly partitioned) variable to 717 store the weights. It's faster, but requires all feature columns are 718 sparse and have the 'sum' combiner. Incompatible with SDCAOptimizer. 719 config: `RunConfig` object to configure the runtime settings. 720 feature_engineering_fn: Feature engineering function. Takes features and 721 labels which are the output of `input_fn` and 722 returns features and labels which will be fed 723 into the model. 724 725 Returns: 726 A `LinearRegressor` estimator. 727 """ 728 self._feature_columns = tuple(feature_columns or []) 729 assert self._feature_columns 730 731 chief_hook = None 732 if (isinstance(optimizer, sdca_optimizer.SDCAOptimizer) and 733 enable_centered_bias): 734 enable_centered_bias = False 735 logging.warning("centered_bias is not supported with SDCA, " 736 "please disable it explicitly.") 737 head = head_lib.regression_head( 738 weight_column_name=weight_column_name, 739 label_dimension=label_dimension, 740 enable_centered_bias=enable_centered_bias) 741 params = { 742 "head": head, 743 "feature_columns": feature_columns, 744 "optimizer": optimizer, 745 } 746 747 if isinstance(optimizer, sdca_optimizer.SDCAOptimizer): 748 assert label_dimension == 1, "SDCA only applies for label_dimension=1." 749 assert not _joint_weights, ("_joint_weights is incompatible with" 750 " SDCAOptimizer.") 751 752 model_fn = sdca_model_fn 753 # The model_fn passes the model parameters to the chief_hook. We then use 754 # the hook to update weights and shrink step only on the chief. 755 chief_hook = _SdcaUpdateWeightsHook() 756 params.update({ 757 "weight_column_name": weight_column_name, 758 "update_weights_hook": chief_hook, 759 }) 760 else: 761 model_fn = _linear_model_fn 762 params.update({ 763 "gradient_clip_norm": gradient_clip_norm, 764 "joint_weights": _joint_weights, 765 }) 766 767 super(LinearRegressor, self).__init__( 768 model_fn=model_fn, 769 model_dir=model_dir, 770 config=config, 771 params=params, 772 feature_engineering_fn=feature_engineering_fn) 773 774 @deprecated_arg_values( 775 estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, 776 as_iterable=False) 777 @deprecated_arg_values( 778 "2017-03-01", 779 "Please switch to predict_scores, or set `outputs` argument.", 780 outputs=None) 781 def predict(self, x=None, input_fn=None, batch_size=None, outputs=None, 782 as_iterable=True): 783 """Returns predictions for given features. 784 785 By default, returns predicted scores. But this default will be dropped 786 soon. Users should either pass `outputs`, or call `predict_scores` method. 787 788 Args: 789 x: features. 790 input_fn: Input function. If set, x must be None. 791 batch_size: Override default batch size. 792 outputs: list of `str`, name of the output to predict. 793 If `None`, returns scores. 794 as_iterable: If True, return an iterable which keeps yielding predictions 795 for each example until inputs are exhausted. Note: The inputs must 796 terminate if you want the iterable to terminate (e.g. be sure to pass 797 num_epochs=1 if you are using something like read_batch_features). 798 799 Returns: 800 Numpy array of predicted scores (or an iterable of predicted scores if 801 as_iterable is True). If `label_dimension == 1`, the shape of the output 802 is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`. 803 If `outputs` is set, returns a dict of predictions. 804 """ 805 if not outputs: 806 return self.predict_scores( 807 x=x, 808 input_fn=input_fn, 809 batch_size=batch_size, 810 as_iterable=as_iterable) 811 return super(LinearRegressor, self).predict( 812 x=x, 813 input_fn=input_fn, 814 batch_size=batch_size, 815 outputs=outputs, 816 as_iterable=as_iterable) 817 818 @deprecated_arg_values( 819 estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS, 820 as_iterable=False) 821 def predict_scores(self, x=None, input_fn=None, batch_size=None, 822 as_iterable=True): 823 """Returns predicted scores for given features. 824 825 Args: 826 x: features. 827 input_fn: Input function. If set, x must be None. 828 batch_size: Override default batch size. 829 as_iterable: If True, return an iterable which keeps yielding predictions 830 for each example until inputs are exhausted. Note: The inputs must 831 terminate if you want the iterable to terminate (e.g. be sure to pass 832 num_epochs=1 if you are using something like read_batch_features). 833 834 Returns: 835 Numpy array of predicted scores (or an iterable of predicted scores if 836 as_iterable is True). If `label_dimension == 1`, the shape of the output 837 is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`. 838 """ 839 key = prediction_key.PredictionKey.SCORES 840 preds = super(LinearRegressor, self).predict( 841 x=x, 842 input_fn=input_fn, 843 batch_size=batch_size, 844 outputs=[key], 845 as_iterable=as_iterable) 846 if as_iterable: 847 return _as_iterable(preds, output=key) 848 return preds[key] 849 850 @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.") 851 def export(self, 852 export_dir, 853 input_fn=None, 854 input_feature_key=None, 855 use_deprecated_input_fn=True, 856 signature_fn=None, 857 default_batch_size=1, 858 exports_to_keep=None): 859 """See BaseEstimator.export.""" 860 def default_input_fn(unused_estimator, examples): 861 return layers.parse_feature_columns_from_examples( 862 examples, self._feature_columns) 863 864 return super(LinearRegressor, self).export( 865 export_dir=export_dir, 866 input_fn=input_fn or default_input_fn, 867 input_feature_key=input_feature_key, 868 use_deprecated_input_fn=use_deprecated_input_fn, 869 signature_fn=(signature_fn or export.regression_signature_fn), 870 prediction_key=prediction_key.PredictionKey.SCORES, 871 default_batch_size=default_batch_size, 872 exports_to_keep=exports_to_keep) 873 874 875class LinearEstimator(estimator.Estimator): 876 """Linear model with user specified head. 877 878 THIS CLASS IS DEPRECATED. See 879 [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) 880 for general migration instructions. 881 882 Train a generalized linear model to predict label value given observation of 883 feature values. 884 885 Example: 886 To do poisson regression, 887 888 ```python 889 sparse_column_a = sparse_column_with_hash_bucket(...) 890 sparse_column_b = sparse_column_with_hash_bucket(...) 891 892 sparse_feature_a_x_sparse_feature_b = crossed_column(...) 893 894 estimator = LinearEstimator( 895 feature_columns=[sparse_column_a, sparse_feature_a_x_sparse_feature_b], 896 head=head_lib.poisson_regression_head()) 897 898 # Input builders 899 def input_fn_train: # returns x, y 900 ... 901 def input_fn_eval: # returns x, y 902 ... 903 estimator.fit(input_fn=input_fn_train) 904 estimator.evaluate(input_fn=input_fn_eval) 905 estimator.predict(x=x) 906 ``` 907 908 Input of `fit` and `evaluate` should have following features, 909 otherwise there will be a KeyError: 910 911 * if `weight_column_name` is not `None`: 912 key=weight_column_name, value=a `Tensor` 913 * for column in `feature_columns`: 914 - if isinstance(column, `SparseColumn`): 915 key=column.name, value=a `SparseTensor` 916 - if isinstance(column, `WeightedSparseColumn`): 917 {key=id column name, value=a `SparseTensor`, 918 key=weight column name, value=a `SparseTensor`} 919 - if isinstance(column, `RealValuedColumn`): 920 key=column.name, value=a `Tensor` 921 """ 922 923 def __init__(self, # _joint_weights: pylint: disable=invalid-name 924 feature_columns, 925 head, 926 model_dir=None, 927 weight_column_name=None, 928 optimizer=None, 929 gradient_clip_norm=None, 930 _joint_weights=False, 931 config=None, 932 feature_engineering_fn=None): 933 """Construct a `LinearEstimator` object. 934 935 Args: 936 feature_columns: An iterable containing all the feature columns used by 937 the model. All items in the set should be instances of classes derived 938 from `FeatureColumn`. 939 head: An instance of _Head class. 940 model_dir: Directory to save model parameters, graph, etc. This can 941 also be used to load checkpoints from the directory into a estimator 942 to continue training a previously saved model. 943 weight_column_name: A string defining feature column name representing 944 weights. It is used to down weight or boost examples during training. It 945 will be multiplied by the loss of the example. 946 optimizer: An instance of `tf.Optimizer` used to train the model. If 947 `None`, will use an Ftrl optimizer. 948 gradient_clip_norm: A `float` > 0. If provided, gradients are clipped 949 to their global norm with this clipping ratio. See 950 `tf.clip_by_global_norm` for more details. 951 _joint_weights: If True use a single (possibly partitioned) variable to 952 store the weights. It's faster, but requires all feature columns are 953 sparse and have the 'sum' combiner. Incompatible with SDCAOptimizer. 954 config: `RunConfig` object to configure the runtime settings. 955 feature_engineering_fn: Feature engineering function. Takes features and 956 labels which are the output of `input_fn` and 957 returns features and labels which will be fed 958 into the model. 959 960 Returns: 961 A `LinearEstimator` estimator. 962 963 Raises: 964 ValueError: if optimizer is not supported, e.g., SDCAOptimizer 965 """ 966 assert feature_columns 967 if isinstance(optimizer, sdca_optimizer.SDCAOptimizer): 968 raise ValueError("LinearEstimator does not support SDCA optimizer.") 969 970 params = { 971 "head": head, 972 "feature_columns": feature_columns, 973 "optimizer": optimizer, 974 "gradient_clip_norm": gradient_clip_norm, 975 "joint_weights": _joint_weights, 976 } 977 super(LinearEstimator, self).__init__( 978 model_fn=_linear_model_fn, 979 model_dir=model_dir, 980 config=config, 981 params=params, 982 feature_engineering_fn=feature_engineering_fn) 983