• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15
16"""TensorFlow estimators for Linear and DNN joined training models (deprecated).
17
18This module and all its submodules are deprecated. See
19[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
20for migration instructions.
21"""
22
23from __future__ import absolute_import
24from __future__ import division
25from __future__ import print_function
26
27import math
28import six
29
30from tensorflow.contrib import layers
31from tensorflow.contrib.framework import deprecated
32from tensorflow.contrib.framework import deprecated_arg_values
33from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
34from tensorflow.contrib.layers.python.layers import optimizers
35from tensorflow.contrib.learn.python.learn import metric_spec
36from tensorflow.contrib.learn.python.learn.estimators import estimator
37from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
38from tensorflow.contrib.learn.python.learn.estimators import model_fn
39from tensorflow.contrib.learn.python.learn.estimators import prediction_key
40from tensorflow.contrib.learn.python.learn.utils import export
41from tensorflow.python.feature_column import feature_column_lib as fc_core
42from tensorflow.python.framework import ops
43from tensorflow.python.ops import control_flow_ops
44from tensorflow.python.ops import nn
45from tensorflow.python.ops import partitioned_variables
46from tensorflow.python.ops import state_ops
47from tensorflow.python.ops import variable_scope
48from tensorflow.python.summary import summary
49from tensorflow.python.training import sync_replicas_optimizer
50from tensorflow.python.training import training_util
51
52
53# The default learning rates are a historical artifact of the initial
54# implementation, but seem a reasonable choice.
55_DNN_LEARNING_RATE = 0.05
56_LINEAR_LEARNING_RATE = 0.2
57
58
59_FIX_GLOBAL_STEP_INCREMENT_DATE = "2017-04-15"
60_FIX_GLOBAL_STEP_INCREMENT_INSTRUCTIONS = (
61    "Please set fix_global_step_increment_bug=True and update training steps "
62    "in your pipeline. See pydoc for details.")
63
64
65def _as_iterable(preds, output):
66  for pred in preds:
67    yield pred[output]
68
69
70def _get_feature_dict(features):
71  if isinstance(features, dict):
72    return features
73  return {"": features}
74
75
76def _get_optimizer(optimizer):
77  if callable(optimizer):
78    return optimizer()
79  else:
80    return optimizer
81
82
83def _check_no_sync_replicas_optimizer(optimizer):
84  if isinstance(optimizer, sync_replicas_optimizer.SyncReplicasOptimizer):
85    raise ValueError(
86        "SyncReplicasOptimizer is not supported in DNNLinearCombined model. "
87        "If you want to use this optimizer, please use either DNN or Linear "
88        "model.")
89
90
91def _linear_learning_rate(num_linear_feature_columns):
92  """Returns the default learning rate of the linear model.
93
94  The calculation is a historical artifact of this initial implementation, but
95  has proven a reasonable choice.
96
97  Args:
98    num_linear_feature_columns: The number of feature columns of the linear
99      model.
100
101  Returns:
102    A float.
103  """
104  default_learning_rate = 1. / math.sqrt(num_linear_feature_columns)
105  return min(_LINEAR_LEARNING_RATE, default_learning_rate)
106
107
108def _add_hidden_layer_summary(value, tag):
109  summary.scalar("%s/fraction_of_zero_values" % tag, nn.zero_fraction(value))
110  summary.histogram("%s/activation" % tag, value)
111
112
113def _add_layer_summary(value, tag):
114  summary.scalar("%s/fraction_of_zero_values" % tag, nn.zero_fraction(value))
115  summary.histogram("%s/activation" % tag, value)
116
117
118def _get_embedding_variable(column, collection_key, input_layer_scope):
119  return ops.get_collection(collection_key,
120                            input_layer_scope + "/" + column.name)
121
122
123def _extract_embedding_lr_multipliers(embedding_lr_multipliers, collection_key,
124                                      input_layer_scope):
125  """Converts embedding lr multipliers to variable based gradient multiplier."""
126  if not embedding_lr_multipliers:
127    return None
128  gradient_multipliers = {}
129  for column, lr_mult in embedding_lr_multipliers.items():
130    if not isinstance(column, feature_column_lib._EmbeddingColumn):  # pylint: disable=protected-access
131      raise ValueError(
132          "learning rate multipler can only be defined for embedding columns. "
133          "It is defined for {}".format(column))
134    embedding = _get_embedding_variable(
135        column, collection_key, input_layer_scope)
136    if not embedding:
137      raise ValueError("Couldn't find a variable for column {}".format(column))
138    for v in embedding:
139      gradient_multipliers[v] = lr_mult
140  return gradient_multipliers
141
142
143def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
144  """Deep Neural Net and Linear combined model_fn.
145
146  Args:
147    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
148    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
149      `int32` or `int64` in the range `[0, n_classes)`.
150    mode: Defines whether this is training, evaluation or prediction.
151      See `ModeKeys`.
152    params: A dict of hyperparameters.
153      The following hyperparameters are expected:
154      * head: A `Head` instance.
155      * linear_feature_columns: An iterable containing all the feature columns
156          used by the Linear model.
157      * linear_optimizer: string, `Optimizer` object, or callable that defines
158          the optimizer to use for training the Linear model. Defaults to the
159          Ftrl optimizer.
160      * joint_linear_weights: If True a single (possibly partitioned) variable
161          will be used to store the linear model weights. It's faster, but
162          requires all columns are sparse and have the 'sum' combiner.
163      * dnn_feature_columns: An iterable containing all the feature columns used
164          by the DNN model.
165      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
166          optimizer to use for training the DNN model. Defaults to the Adagrad
167          optimizer.
168      * dnn_hidden_units: List of hidden units per DNN layer.
169      * dnn_activation_fn: Activation function applied to each DNN layer. If
170          `None`, will use `tf.nn.relu`.
171      * dnn_dropout: When not `None`, the probability we will drop out a given
172          DNN coordinate.
173      * gradient_clip_norm: A float > 0. If provided, gradients are
174          clipped to their global norm with this clipping ratio.
175      * embedding_lr_multipliers: Optional. A dictionary from
176          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
177          multiply with learning rate for the embedding variables.
178      * input_layer_partitioner: Optional. Partitioner for input layer.
179    config: `RunConfig` object to configure the runtime settings.
180
181  Returns:
182    `ModelFnOps`
183
184  Raises:
185    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
186      are empty at the same time, or `input_layer_partitioner` is missing.
187  """
188  head = params["head"]
189  linear_feature_columns = params.get("linear_feature_columns")
190  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
191  joint_linear_weights = params.get("joint_linear_weights")
192  dnn_feature_columns = params.get("dnn_feature_columns")
193  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
194  dnn_hidden_units = params.get("dnn_hidden_units")
195  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
196  dnn_dropout = params.get("dnn_dropout")
197  gradient_clip_norm = params.get("gradient_clip_norm")
198  num_ps_replicas = config.num_ps_replicas if config else 0
199  input_layer_partitioner = params.get("input_layer_partitioner") or (
200      partitioned_variables.min_max_variable_partitioner(
201          max_partitions=num_ps_replicas,
202          min_slice_size=64 << 20))
203  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
204  fix_global_step_increment_bug = params.get(
205      "fix_global_step_increment_bug", True)
206
207  if not linear_feature_columns and not dnn_feature_columns:
208    raise ValueError(
209        "Either linear_feature_columns or dnn_feature_columns must be defined.")
210
211  features = _get_feature_dict(features)
212
213  linear_optimizer = _get_optimizer(linear_optimizer)
214  _check_no_sync_replicas_optimizer(linear_optimizer)
215  dnn_optimizer = _get_optimizer(dnn_optimizer)
216  _check_no_sync_replicas_optimizer(dnn_optimizer)
217
218  # Build DNN Logits.
219  dnn_parent_scope = "dnn"
220
221  if not dnn_feature_columns:
222    dnn_logits = None
223  else:
224    if not dnn_hidden_units:
225      raise ValueError(
226          "dnn_hidden_units must be defined when dnn_feature_columns is "
227          "specified.")
228    dnn_partitioner = (
229        partitioned_variables.min_max_variable_partitioner(
230            max_partitions=num_ps_replicas))
231    with variable_scope.variable_scope(
232        dnn_parent_scope,
233        values=tuple(six.itervalues(features)),
234        partitioner=dnn_partitioner):
235      with variable_scope.variable_scope(
236          "input_from_feature_columns",
237          values=tuple(six.itervalues(features)),
238          partitioner=input_layer_partitioner) as dnn_input_scope:
239        if all(
240            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
241            for fc in dnn_feature_columns
242        ):
243          net = layers.input_from_feature_columns(
244              columns_to_tensors=features,
245              feature_columns=dnn_feature_columns,
246              weight_collections=[dnn_parent_scope],
247              scope=dnn_input_scope)
248        else:
249          net = fc_core.input_layer(
250              features=features,
251              feature_columns=dnn_feature_columns,
252              weight_collections=[dnn_parent_scope])
253
254      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
255        with variable_scope.variable_scope(
256            "hiddenlayer_%d" % layer_id,
257            values=(net,)) as dnn_hidden_layer_scope:
258          net = layers.fully_connected(
259              net,
260              num_hidden_units,
261              activation_fn=dnn_activation_fn,
262              variables_collections=[dnn_parent_scope],
263              scope=dnn_hidden_layer_scope)
264          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
265            net = layers.dropout(
266                net,
267                keep_prob=(1.0 - dnn_dropout))
268        # TODO(b/31209633): Consider adding summary before dropout.
269        _add_layer_summary(net, dnn_hidden_layer_scope.name)
270
271      with variable_scope.variable_scope(
272          "logits",
273          values=(net,)) as dnn_logits_scope:
274        dnn_logits = layers.fully_connected(
275            net,
276            head.logits_dimension,
277            activation_fn=None,
278            variables_collections=[dnn_parent_scope],
279            scope=dnn_logits_scope)
280      _add_layer_summary(dnn_logits, dnn_logits_scope.name)
281
282  # Build Linear logits.
283  linear_parent_scope = "linear"
284
285  if not linear_feature_columns:
286    linear_logits = None
287  else:
288    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
289        max_partitions=num_ps_replicas,
290        min_slice_size=64 << 20)
291    with variable_scope.variable_scope(
292        linear_parent_scope,
293        values=tuple(six.itervalues(features)),
294        partitioner=linear_partitioner) as scope:
295      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
296             for fc in linear_feature_columns):
297        if joint_linear_weights:
298          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
299              columns_to_tensors=features,
300              feature_columns=linear_feature_columns,
301              num_outputs=head.logits_dimension,
302              weight_collections=[linear_parent_scope],
303              scope=scope)
304        else:
305          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
306              columns_to_tensors=features,
307              feature_columns=linear_feature_columns,
308              num_outputs=head.logits_dimension,
309              weight_collections=[linear_parent_scope],
310              scope=scope)
311      else:
312        linear_logits = fc_core.linear_model(
313            features=features,
314            feature_columns=linear_feature_columns,
315            units=head.logits_dimension,
316            weight_collections=[linear_parent_scope])
317
318      _add_layer_summary(linear_logits, scope.name)
319
320  # Combine logits and build full model.
321  if dnn_logits is not None and linear_logits is not None:
322    logits = dnn_logits + linear_logits
323  elif dnn_logits is not None:
324    logits = dnn_logits
325  else:
326    logits = linear_logits
327
328  def _make_training_op(training_loss):
329    """Training op for the DNN linear combined model."""
330    train_ops = []
331    global_step = training_util.get_global_step()
332    if dnn_logits is not None:
333      train_ops.append(
334          optimizers.optimize_loss(
335              loss=training_loss,
336              global_step=global_step,
337              learning_rate=_DNN_LEARNING_RATE,
338              optimizer=dnn_optimizer,
339              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
340                  embedding_lr_multipliers, dnn_parent_scope,
341                  dnn_input_scope.name),
342              clip_gradients=gradient_clip_norm,
343              variables=ops.get_collection(dnn_parent_scope),
344              name=dnn_parent_scope,
345              # Empty summaries, because head already logs "loss" summary.
346              summaries=[],
347              increment_global_step=not fix_global_step_increment_bug))
348    if linear_logits is not None:
349      train_ops.append(
350          optimizers.optimize_loss(
351              loss=training_loss,
352              global_step=global_step,
353              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
354              optimizer=linear_optimizer,
355              clip_gradients=gradient_clip_norm,
356              variables=ops.get_collection(linear_parent_scope),
357              name=linear_parent_scope,
358              # Empty summaries, because head already logs "loss" summary.
359              summaries=[],
360              increment_global_step=not fix_global_step_increment_bug))
361
362    train_op = control_flow_ops.group(*train_ops)
363    if fix_global_step_increment_bug:
364      with ops.control_dependencies([train_op]):
365        with ops.colocate_with(global_step):
366          return state_ops.assign_add(global_step, 1).op
367    return train_op
368
369  return head.create_model_fn_ops(
370      features=features,
371      mode=mode,
372      labels=labels,
373      train_op_fn=_make_training_op,
374      logits=logits)
375
376
377class DNNLinearCombinedEstimator(estimator.Estimator):
378  """An estimator for TensorFlow Linear and DNN joined training models.
379
380  THIS CLASS IS DEPRECATED. See
381  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
382  for general migration instructions.
383
384  Note: New users must set `fix_global_step_increment_bug=True` when creating an
385  estimator.
386
387  Input of `fit`, `train`, and `evaluate` should have following features,
388    otherwise there will be a `KeyError`:
389      if `weight_column_name` is not `None`, a feature with
390        `key=weight_column_name` whose value is a `Tensor`.
391      for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
392      - if `column` is a `SparseColumn`, a feature with `key=column.name`
393        whose `value` is a `SparseTensor`.
394      - if `column` is a `WeightedSparseColumn`, two features: the first with
395        `key` the id column name, the second with `key` the weight column
396        name. Both features' `value` must be a `SparseTensor`.
397      - if `column` is a `RealValuedColumn, a feature with `key=column.name`
398        whose `value` is a `Tensor`.
399  """
400
401  @deprecated_arg_values(
402      _FIX_GLOBAL_STEP_INCREMENT_DATE,
403      _FIX_GLOBAL_STEP_INCREMENT_INSTRUCTIONS,
404      fix_global_step_increment_bug=False)
405  def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
406               head,
407               model_dir=None,
408               linear_feature_columns=None,
409               linear_optimizer=None,
410               _joint_linear_weights=False,
411               dnn_feature_columns=None,
412               dnn_optimizer=None,
413               dnn_hidden_units=None,
414               dnn_activation_fn=None,
415               dnn_dropout=None,
416               gradient_clip_norm=None,
417               config=None,
418               feature_engineering_fn=None,
419               embedding_lr_multipliers=None,
420               fix_global_step_increment_bug=False,
421               input_layer_partitioner=None):
422    """Initializes a DNNLinearCombinedEstimator instance.
423
424    Note: New users must set `fix_global_step_increment_bug=True` when creating
425    an estimator.
426
427    Args:
428      head: A _Head object.
429      model_dir: Directory to save model parameters, graph and etc. This can
430        also be used to load checkpoints from the directory into a estimator
431        to continue training a previously saved model.
432      linear_feature_columns: An iterable containing all the feature columns
433        used by linear part of the model. All items in the set should be
434        instances of classes derived from `FeatureColumn`.
435      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
436        the linear part of the model. If `None`, will use a FTRL optimizer.
437      _joint_linear_weights: If True will use a single (possibly partitioned)
438        variable to store all weights for the linear model. More efficient if
439        there are many columns, however requires all columns are sparse and
440        have the 'sum' combiner.
441      dnn_feature_columns: An iterable containing all the feature columns used
442        by deep part of the model. All items in the set should be instances of
443        classes derived from `FeatureColumn`.
444      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
445        the deep part of the model. If `None`, will use an Adagrad optimizer.
446      dnn_hidden_units: List of hidden units per layer. All layers are fully
447        connected.
448      dnn_activation_fn: Activation function applied to each layer. If `None`,
449        will use `tf.nn.relu`.
450      dnn_dropout: When not None, the probability we will drop out
451        a given coordinate.
452      gradient_clip_norm: A float > 0. If provided, gradients are clipped
453        to their global norm with this clipping ratio. See
454        tf.clip_by_global_norm for more details.
455      config: RunConfig object to configure the runtime settings.
456      feature_engineering_fn: Feature engineering function. Takes features and
457        labels which are the output of `input_fn` and returns features and
458        labels which will be fed into the model.
459      embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to
460        a `float` multiplier. Multiplier will be used to multiply with
461        learning rate for the embedding variables.
462      fix_global_step_increment_bug: If `False`, the estimator needs two fit
463        steps to optimize both linear and dnn parts. If `True`, this bug is
464        fixed. New users must set this to `True`, but the default value is
465        `False` for backwards compatibility.
466      input_layer_partitioner: Optional. Partitioner for input layer.
467
468    Raises:
469      ValueError: If both linear_feature_columns and dnn_features_columns are
470        empty at the same time.
471    """
472    linear_feature_columns = tuple(linear_feature_columns or [])
473    dnn_feature_columns = tuple(dnn_feature_columns or [])
474    if not linear_feature_columns + dnn_feature_columns:
475      raise ValueError("Either linear_feature_columns or dnn_feature_columns "
476                       "must be defined.")
477    super(DNNLinearCombinedEstimator, self).__init__(
478        model_fn=_dnn_linear_combined_model_fn,
479        model_dir=model_dir,
480        config=config,
481        params={
482            "head": head,
483            "linear_feature_columns": linear_feature_columns,
484            "linear_optimizer": linear_optimizer,
485            "joint_linear_weights": _joint_linear_weights,
486            "dnn_feature_columns": dnn_feature_columns,
487            "dnn_optimizer": dnn_optimizer,
488            "dnn_hidden_units": dnn_hidden_units,
489            "dnn_activation_fn": dnn_activation_fn,
490            "dnn_dropout": dnn_dropout,
491            "gradient_clip_norm": gradient_clip_norm,
492            "embedding_lr_multipliers": embedding_lr_multipliers,
493            "fix_global_step_increment_bug": fix_global_step_increment_bug,
494            "input_layer_partitioner": input_layer_partitioner
495        },
496        feature_engineering_fn=feature_engineering_fn)
497
498
499class DNNLinearCombinedClassifier(estimator.Estimator):
500  """A classifier for TensorFlow Linear and DNN joined training models.
501
502  THIS CLASS IS DEPRECATED. See
503  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
504  for general migration instructions.
505
506  Note: New users must set `fix_global_step_increment_bug=True` when creating an
507  estimator.
508
509  Example:
510
511  ```python
512  sparse_feature_a = sparse_column_with_hash_bucket(...)
513  sparse_feature_b = sparse_column_with_hash_bucket(...)
514
515  sparse_feature_a_x_sparse_feature_b = crossed_column(...)
516
517  sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a,
518                                          ...)
519  sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b,
520                                          ...)
521
522  estimator = DNNLinearCombinedClassifier(
523      # common settings
524      n_classes=n_classes,
525      weight_column_name=weight_column_name,
526      # wide settings
527      linear_feature_columns=[sparse_feature_a_x_sparse_feature_b],
528      linear_optimizer=tf.train.FtrlOptimizer(...),
529      # deep settings
530      dnn_feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
531      dnn_hidden_units=[1000, 500, 100],
532      dnn_optimizer=tf.train.AdagradOptimizer(...))
533
534  # Input builders
535  def input_fn_train: # returns x, y (where y represents label's class index).
536    ...
537  def input_fn_eval: # returns x, y (where y represents label's class index).
538    ...
539  def input_fn_predict: # returns x, None.
540    ...
541  estimator.fit(input_fn=input_fn_train)
542  estimator.evaluate(input_fn=input_fn_eval)
543  # predict_classes returns class indices.
544  estimator.predict_classes(input_fn=input_fn_predict)
545  ```
546
547  If the user specifies `label_keys` in constructor, labels must be strings from
548  the `label_keys` vocabulary. Example:
549
550  ```python
551  label_keys = ['label0', 'label1', 'label2']
552  estimator = DNNLinearCombinedClassifier(
553      n_classes=n_classes,
554      linear_feature_columns=[sparse_feature_a_x_sparse_feature_b],
555      dnn_feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
556      dnn_hidden_units=[1000, 500, 100],
557      label_keys=label_keys)
558
559  def input_fn_train: # returns x, y (where y is one of label_keys).
560    pass
561  estimator.fit(input_fn=input_fn_train)
562
563  def input_fn_eval: # returns x, y (where y is one of label_keys).
564    pass
565  estimator.evaluate(input_fn=input_fn_eval)
566  def input_fn_predict: # returns x, None
567  # predict_classes returns one of label_keys.
568  estimator.predict_classes(input_fn=input_fn_predict)
569  ```
570
571  Input of `fit` and `evaluate` should have following features,
572    otherwise there will be a `KeyError`:
573
574  * if `weight_column_name` is not `None`, a feature with
575        `key=weight_column_name` whose value is a `Tensor`.
576  * for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
577      - if `column` is a `SparseColumn`, a feature with `key=column.name`
578        whose `value` is a `SparseTensor`.
579      - if `column` is a `WeightedSparseColumn`, two features: the first with
580        `key` the id column name, the second with `key` the weight column name.
581        Both features' `value` must be a `SparseTensor`.
582      - if `column` is a `RealValuedColumn, a feature with `key=column.name`
583        whose `value` is a `Tensor`.
584  """
585
586  @deprecated_arg_values(
587      _FIX_GLOBAL_STEP_INCREMENT_DATE,
588      _FIX_GLOBAL_STEP_INCREMENT_INSTRUCTIONS,
589      fix_global_step_increment_bug=False)
590  def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
591               model_dir=None,
592               n_classes=2,
593               weight_column_name=None,
594               linear_feature_columns=None,
595               linear_optimizer=None,
596               _joint_linear_weights=False,
597               dnn_feature_columns=None,
598               dnn_optimizer=None,
599               dnn_hidden_units=None,
600               dnn_activation_fn=nn.relu,
601               dnn_dropout=None,
602               gradient_clip_norm=None,
603               enable_centered_bias=False,
604               config=None,
605               feature_engineering_fn=None,
606               embedding_lr_multipliers=None,
607               input_layer_min_slice_size=None,
608               label_keys=None,
609               fix_global_step_increment_bug=False):
610    """Constructs a DNNLinearCombinedClassifier instance.
611
612    Note: New users must set `fix_global_step_increment_bug=True` when creating
613    an estimator.
614
615    Args:
616      model_dir: Directory to save model parameters, graph and etc. This can
617        also be used to load checkpoints from the directory into a estimator
618        to continue training a previously saved model.
619      n_classes: number of label classes. Default is binary classification.
620        Note that class labels are integers representing the class index (i.e.
621        values from 0 to n_classes-1). For arbitrary label values (e.g. string
622        labels), convert to class indices first.
623      weight_column_name: A string defining feature column name representing
624        weights. It is used to down weight or boost examples during training.
625        It will be multiplied by the loss of the example.
626      linear_feature_columns: An iterable containing all the feature columns
627        used by linear part of the model. All items in the set must be
628        instances of classes derived from `FeatureColumn`.
629      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
630        the linear part of the model. If `None`, will use a FTRL optimizer.
631      _joint_linear_weights: If True a single (possibly partitioned) variable
632        will be used to store the linear model weights. It's faster, but
633        requires all columns are sparse and have the 'sum' combiner.
634      dnn_feature_columns: An iterable containing all the feature columns used
635        by deep part of the model. All items in the set must be instances of
636        classes derived from `FeatureColumn`.
637      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
638        the deep part of the model. If `None`, will use an Adagrad optimizer.
639      dnn_hidden_units: List of hidden units per layer. All layers are fully
640        connected.
641      dnn_activation_fn: Activation function applied to each layer. If `None`,
642        will use `tf.nn.relu`.
643      dnn_dropout: When not None, the probability we will drop out
644        a given coordinate.
645      gradient_clip_norm: A float > 0. If provided, gradients are clipped
646        to their global norm with this clipping ratio. See
647        tf.clip_by_global_norm for more details.
648      enable_centered_bias: A bool. If True, estimator will learn a centered
649        bias variable for each class. Rest of the model structure learns the
650        residual after centered bias.
651      config: RunConfig object to configure the runtime settings.
652      feature_engineering_fn: Feature engineering function. Takes features and
653        labels which are the output of `input_fn` and returns features and
654        labels which will be fed into the model.
655      embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to
656        a `float` multiplier. Multiplier will be used to multiply with
657        learning rate for the embedding variables.
658      input_layer_min_slice_size: Optional. The min slice size of input layer
659        partitions. If not provided, will use the default of 64M.
660      label_keys: Optional list of strings with size `[n_classes]` defining the
661        label vocabulary. Only supported for `n_classes` > 2.
662      fix_global_step_increment_bug: If `False`, the estimator needs two fit
663        steps to optimize both linear and dnn parts. If `True`, this bug is
664        fixed. New users must set this to `True`, but it the default value is
665        `False` for backwards compatibility.
666
667    Raises:
668      ValueError: If `n_classes` < 2.
669      ValueError: If both `linear_feature_columns` and `dnn_features_columns`
670        are empty at the same time.
671    """
672    head = head_lib.multi_class_head(
673        n_classes=n_classes,
674        weight_column_name=weight_column_name,
675        enable_centered_bias=enable_centered_bias,
676        label_keys=label_keys)
677    linear_feature_columns = tuple(linear_feature_columns or [])
678    dnn_feature_columns = tuple(dnn_feature_columns or [])
679    self._feature_columns = linear_feature_columns + dnn_feature_columns
680    if not self._feature_columns:
681      raise ValueError("Either linear_feature_columns or dnn_feature_columns "
682                       "must be defined.")
683
684    # TODO(b/35922130): Replace with `input_layer_partitioner` arg.
685    input_layer_partitioner = None
686    if input_layer_min_slice_size is not None:
687      input_layer_partitioner = (
688          partitioned_variables.min_max_variable_partitioner(
689              max_partitions=config.num_ps_replicas if config else 0,
690              min_slice_size=input_layer_min_slice_size))
691
692    super(DNNLinearCombinedClassifier, self).__init__(
693        model_fn=_dnn_linear_combined_model_fn,
694        model_dir=model_dir,
695        config=config,
696        params={
697            "head": head,
698            "linear_feature_columns": linear_feature_columns,
699            "linear_optimizer": linear_optimizer,
700            "joint_linear_weights": _joint_linear_weights,
701            "dnn_feature_columns": dnn_feature_columns,
702            "dnn_optimizer": dnn_optimizer,
703            "dnn_hidden_units": dnn_hidden_units,
704            "dnn_activation_fn": dnn_activation_fn,
705            "dnn_dropout": dnn_dropout,
706            "gradient_clip_norm": gradient_clip_norm,
707            "embedding_lr_multipliers": embedding_lr_multipliers,
708            "input_layer_partitioner": input_layer_partitioner,
709            "fix_global_step_increment_bug": fix_global_step_increment_bug,
710        },
711        feature_engineering_fn=feature_engineering_fn)
712
713  @deprecated_arg_values(
714      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
715      as_iterable=False)
716  @deprecated_arg_values(
717      "2017-03-01",
718      "Please switch to predict_classes, or set `outputs` argument.",
719      outputs=None)
720  def predict(self, x=None, input_fn=None, batch_size=None, outputs=None,
721              as_iterable=True):
722    """Returns predictions for given features.
723
724    By default, returns predicted classes. But this default will be dropped
725    soon. Users should either pass `outputs`, or call `predict_classes` method.
726
727    Args:
728      x: features.
729      input_fn: Input function. If set, x must be None.
730      batch_size: Override default batch size.
731      outputs: list of `str`, name of the output to predict.
732        If `None`, returns classes.
733      as_iterable: If True, return an iterable which keeps yielding predictions
734        for each example until inputs are exhausted. Note: The inputs must
735        terminate if you want the iterable to terminate (e.g. be sure to pass
736        num_epochs=1 if you are using something like read_batch_features).
737
738    Returns:
739      Numpy array of predicted classes with shape [batch_size] (or an iterable
740      of predicted classes if as_iterable is True). Each predicted class is
741      represented by its class index (i.e. integer from 0 to n_classes-1).
742      If `outputs` is set, returns a dict of predictions.
743    """
744    if not outputs:
745      return self.predict_classes(
746          x=x,
747          input_fn=input_fn,
748          batch_size=batch_size,
749          as_iterable=as_iterable)
750    return super(DNNLinearCombinedClassifier, self).predict(
751        x=x,
752        input_fn=input_fn,
753        batch_size=batch_size,
754        outputs=outputs,
755        as_iterable=as_iterable)
756
757  @deprecated_arg_values(
758      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
759      as_iterable=False)
760  def predict_classes(self, x=None, input_fn=None, batch_size=None,
761                      as_iterable=True):
762    """Returns predicted classes for given features.
763
764    Args:
765      x: features.
766      input_fn: Input function. If set, x must be None.
767      batch_size: Override default batch size.
768      as_iterable: If True, return an iterable which keeps yielding predictions
769        for each example until inputs are exhausted. Note: The inputs must
770        terminate if you want the iterable to terminate (e.g. be sure to pass
771        num_epochs=1 if you are using something like read_batch_features).
772
773    Returns:
774      Numpy array of predicted classes with shape [batch_size] (or an iterable
775      of predicted classes if as_iterable is True). Each predicted class is
776      represented by its class index (i.e. integer from 0 to n_classes-1).
777    """
778    key = prediction_key.PredictionKey.CLASSES
779    preds = super(DNNLinearCombinedClassifier, self).predict(
780        x=x,
781        input_fn=input_fn,
782        batch_size=batch_size,
783        outputs=[key],
784        as_iterable=as_iterable)
785    if as_iterable:
786      return _as_iterable(preds, output=key)
787    return preds[key].reshape(-1)
788
789  @deprecated_arg_values(
790      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
791      as_iterable=False)
792  def predict_proba(
793      self, x=None, input_fn=None, batch_size=None, as_iterable=True):
794    """Returns prediction probabilities for given features.
795
796    Args:
797      x: features.
798      input_fn: Input function. If set, x and y must be None.
799      batch_size: Override default batch size.
800      as_iterable: If True, return an iterable which keeps yielding predictions
801        for each example until inputs are exhausted. Note: The inputs must
802        terminate if you want the iterable to terminate (e.g. be sure to pass
803        num_epochs=1 if you are using something like read_batch_features).
804
805    Returns:
806      Numpy array of predicted probabilities with shape [batch_size, n_classes]
807      (or an iterable of predicted probabilities if as_iterable is True).
808    """
809    key = prediction_key.PredictionKey.PROBABILITIES
810    preds = super(DNNLinearCombinedClassifier, self).predict(
811        x=x,
812        input_fn=input_fn,
813        batch_size=batch_size,
814        outputs=[key],
815        as_iterable=as_iterable)
816    if as_iterable:
817      return _as_iterable(preds, output=key)
818    return preds[key]
819
820  @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.")
821  def export(self,
822             export_dir,
823             input_fn=None,
824             input_feature_key=None,
825             use_deprecated_input_fn=True,
826             signature_fn=None,
827             default_batch_size=1,
828             exports_to_keep=None):
829    """See BasEstimator.export."""
830    def default_input_fn(unused_estimator, examples):
831      return layers.parse_feature_columns_from_examples(
832          examples, self._feature_columns)
833    return super(DNNLinearCombinedClassifier, self).export(
834        export_dir=export_dir,
835        input_fn=input_fn or default_input_fn,
836        input_feature_key=input_feature_key,
837        use_deprecated_input_fn=use_deprecated_input_fn,
838        signature_fn=(signature_fn or
839                      export.classification_signature_fn_with_prob),
840        prediction_key=prediction_key.PredictionKey.PROBABILITIES,
841        default_batch_size=default_batch_size,
842        exports_to_keep=exports_to_keep)
843
844
845class DNNLinearCombinedRegressor(estimator.Estimator):
846  """A regressor for TensorFlow Linear and DNN joined training models.
847
848  THIS CLASS IS DEPRECATED. See
849  [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md)
850  for general migration instructions.
851
852  Note: New users must set `fix_global_step_increment_bug=True` when creating an
853  estimator.
854
855  Example:
856
857  ```python
858  sparse_feature_a = sparse_column_with_hash_bucket(...)
859  sparse_feature_b = sparse_column_with_hash_bucket(...)
860
861  sparse_feature_a_x_sparse_feature_b = crossed_column(...)
862
863  sparse_feature_a_emb = embedding_column(sparse_id_column=sparse_feature_a,
864                                          ...)
865  sparse_feature_b_emb = embedding_column(sparse_id_column=sparse_feature_b,
866                                          ...)
867
868  estimator = DNNLinearCombinedRegressor(
869      # common settings
870      weight_column_name=weight_column_name,
871      # wide settings
872      linear_feature_columns=[sparse_feature_a_x_sparse_feature_b],
873      linear_optimizer=tf.train.FtrlOptimizer(...),
874      # deep settings
875      dnn_feature_columns=[sparse_feature_a_emb, sparse_feature_b_emb],
876      dnn_hidden_units=[1000, 500, 100],
877      dnn_optimizer=tf.train.ProximalAdagradOptimizer(...))
878
879  # To apply L1 and L2 regularization, you can set optimizers as follows:
880  tf.train.ProximalAdagradOptimizer(
881      learning_rate=0.1,
882      l1_regularization_strength=0.001,
883      l2_regularization_strength=0.001)
884  # It is same for FtrlOptimizer.
885
886  # Input builders
887  def input_fn_train: # returns x, y
888    ...
889  def input_fn_eval: # returns x, y
890    ...
891  def input_fn_predict: # returns x, None
892    ...
893  estimator.train(input_fn_train)
894  estimator.evaluate(input_fn_eval)
895  estimator.predict(input_fn_predict)
896  ```
897
898  Input of `fit`, `train`, and `evaluate` should have following features,
899    otherwise there will be a `KeyError`:
900      if `weight_column_name` is not `None`, a feature with
901        `key=weight_column_name` whose value is a `Tensor`.
902      for each `column` in `dnn_feature_columns` + `linear_feature_columns`:
903      - if `column` is a `SparseColumn`, a feature with `key=column.name`
904        whose `value` is a `SparseTensor`.
905      - if `column` is a `WeightedSparseColumn`, two features: the first with
906        `key` the id column name, the second with `key` the weight column name.
907        Both features' `value` must be a `SparseTensor`.
908      - if `column` is a `RealValuedColumn, a feature with `key=column.name`
909        whose `value` is a `Tensor`.
910  """
911
912  @deprecated_arg_values(
913      _FIX_GLOBAL_STEP_INCREMENT_DATE,
914      _FIX_GLOBAL_STEP_INCREMENT_INSTRUCTIONS,
915      fix_global_step_increment_bug=False)
916  def __init__(self,  # _joint_linear_weights pylint: disable=invalid-name
917               model_dir=None,
918               weight_column_name=None,
919               linear_feature_columns=None,
920               linear_optimizer=None,
921               _joint_linear_weights=False,
922               dnn_feature_columns=None,
923               dnn_optimizer=None,
924               dnn_hidden_units=None,
925               dnn_activation_fn=nn.relu,
926               dnn_dropout=None,
927               gradient_clip_norm=None,
928               enable_centered_bias=False,
929               label_dimension=1,
930               config=None,
931               feature_engineering_fn=None,
932               embedding_lr_multipliers=None,
933               input_layer_min_slice_size=None,
934               fix_global_step_increment_bug=False):
935    """Initializes a DNNLinearCombinedRegressor instance.
936
937    Note: New users must set `fix_global_step_increment_bug=True` when creating
938    an estimator.
939
940    Args:
941      model_dir: Directory to save model parameters, graph and etc. This can
942        also be used to load checkpoints from the directory into a estimator
943        to continue training a previously saved model.
944      weight_column_name: A string defining feature column name representing
945        weights. It is used to down weight or boost examples during training. It
946        will be multiplied by the loss of the example.
947      linear_feature_columns: An iterable containing all the feature columns
948        used by linear part of the model. All items in the set must be
949        instances of classes derived from `FeatureColumn`.
950      linear_optimizer: An instance of `tf.Optimizer` used to apply gradients to
951        the linear part of the model. If `None`, will use a FTRL optimizer.
952      _joint_linear_weights: If True a single (possibly partitioned) variable
953        will be used to store the linear model weights. It's faster, but
954        requires that all columns are sparse and have the 'sum' combiner.
955      dnn_feature_columns: An iterable containing all the feature columns used
956        by deep part of the model. All items in the set must be instances of
957        classes derived from `FeatureColumn`.
958      dnn_optimizer: An instance of `tf.Optimizer` used to apply gradients to
959        the deep part of the model. If `None`, will use an Adagrad optimizer.
960      dnn_hidden_units: List of hidden units per layer. All layers are fully
961        connected.
962      dnn_activation_fn: Activation function applied to each layer. If None,
963        will use `tf.nn.relu`.
964      dnn_dropout: When not None, the probability we will drop out
965        a given coordinate.
966      gradient_clip_norm: A float > 0. If provided, gradients are clipped
967        to their global norm with this clipping ratio. See
968        tf.clip_by_global_norm for more details.
969      enable_centered_bias: A bool. If True, estimator will learn a centered
970        bias variable for each class. Rest of the model structure learns the
971        residual after centered bias.
972      label_dimension: Number of regression targets per example. This is the
973        size of the last dimension of the labels and logits `Tensor` objects
974        (typically, these have shape `[batch_size, label_dimension]`).
975      config: RunConfig object to configure the runtime settings.
976      feature_engineering_fn: Feature engineering function. Takes features and
977        labels which are the output of `input_fn` and returns features and
978        labels which will be fed into the model.
979      embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to
980        a `float` multiplier. Multiplier will be used to multiply with
981        learning rate for the embedding variables.
982      input_layer_min_slice_size: Optional. The min slice size of input layer
983        partitions. If not provided, will use the default of 64M.
984      fix_global_step_increment_bug: If `False`, the estimator needs two fit
985        steps to optimize both linear and dnn parts. If `True`, this bug is
986        fixed. New users must set this to `True`, but it the default value is
987        `False` for backwards compatibility.
988
989    Raises:
990      ValueError: If both linear_feature_columns and dnn_features_columns are
991        empty at the same time.
992    """
993    linear_feature_columns = linear_feature_columns or []
994    dnn_feature_columns = dnn_feature_columns or []
995    self._feature_columns = linear_feature_columns + dnn_feature_columns
996    if not self._feature_columns:
997      raise ValueError("Either linear_feature_columns or dnn_feature_columns "
998                       "must be defined.")
999
1000    # TODO(b/35922130): Replace with `input_layer_partitioner` arg.
1001    input_layer_partitioner = None
1002    if input_layer_min_slice_size is not None:
1003      input_layer_partitioner = (
1004          partitioned_variables.min_max_variable_partitioner(
1005              max_partitions=config.num_ps_replicas if config else 0,
1006              min_slice_size=input_layer_min_slice_size))
1007
1008    head = head_lib.regression_head(
1009        weight_column_name=weight_column_name,
1010        label_dimension=label_dimension,
1011        enable_centered_bias=enable_centered_bias)
1012    super(DNNLinearCombinedRegressor, self).__init__(
1013        model_fn=_dnn_linear_combined_model_fn,
1014        model_dir=model_dir,
1015        config=config,
1016        params={
1017            "head": head,
1018            "linear_feature_columns": linear_feature_columns,
1019            "linear_optimizer": linear_optimizer,
1020            "joint_linear_weights": _joint_linear_weights,
1021            "dnn_feature_columns": dnn_feature_columns,
1022            "dnn_optimizer": dnn_optimizer,
1023            "dnn_hidden_units": dnn_hidden_units,
1024            "dnn_activation_fn": dnn_activation_fn,
1025            "dnn_dropout": dnn_dropout,
1026            "gradient_clip_norm": gradient_clip_norm,
1027            "embedding_lr_multipliers": embedding_lr_multipliers,
1028            "input_layer_partitioner": input_layer_partitioner,
1029            "fix_global_step_increment_bug": fix_global_step_increment_bug,
1030        },
1031        feature_engineering_fn=feature_engineering_fn)
1032
1033  def evaluate(self,
1034               x=None,
1035               y=None,
1036               input_fn=None,
1037               feed_fn=None,
1038               batch_size=None,
1039               steps=None,
1040               metrics=None,
1041               name=None,
1042               checkpoint_path=None,
1043               hooks=None):
1044    """See evaluable.Evaluable."""
1045    # TODO(zakaria): remove once deprecation is finished (b/31229024)
1046    custom_metrics = {}
1047    if metrics:
1048      for key, metric in six.iteritems(metrics):
1049        if (not isinstance(metric, metric_spec.MetricSpec) and
1050            not isinstance(key, tuple)):
1051          custom_metrics[(key, prediction_key.PredictionKey.SCORES)] = metric
1052        else:
1053          custom_metrics[key] = metric
1054
1055    return super(DNNLinearCombinedRegressor, self).evaluate(
1056        x=x,
1057        y=y,
1058        input_fn=input_fn,
1059        feed_fn=feed_fn,
1060        batch_size=batch_size,
1061        steps=steps,
1062        metrics=custom_metrics,
1063        name=name,
1064        checkpoint_path=checkpoint_path,
1065        hooks=hooks)
1066
1067  @deprecated_arg_values(
1068      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
1069      as_iterable=False)
1070  @deprecated_arg_values(
1071      "2017-03-01",
1072      "Please switch to predict_scores, or set `outputs` argument.",
1073      outputs=None)
1074  def predict(self, x=None, input_fn=None, batch_size=None, outputs=None,
1075              as_iterable=True):
1076    """Returns predictions for given features.
1077
1078    By default, returns predicted scores. But this default will be dropped
1079    soon. Users should either pass `outputs`, or call `predict_scores` method.
1080
1081    Args:
1082      x: features.
1083      input_fn: Input function. If set, x must be None.
1084      batch_size: Override default batch size.
1085      outputs: list of `str`, name of the output to predict.
1086        If `None`, returns scores.
1087      as_iterable: If True, return an iterable which keeps yielding predictions
1088        for each example until inputs are exhausted. Note: The inputs must
1089        terminate if you want the iterable to terminate (e.g. be sure to pass
1090        num_epochs=1 if you are using something like read_batch_features).
1091
1092    Returns:
1093      Numpy array of predicted scores (or an iterable of predicted scores if
1094      as_iterable is True). If `label_dimension == 1`, the shape of the output
1095      is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`.
1096      If `outputs` is set, returns a dict of predictions.
1097    """
1098    if not outputs:
1099      return self.predict_scores(
1100          x=x,
1101          input_fn=input_fn,
1102          batch_size=batch_size,
1103          as_iterable=as_iterable)
1104    return super(DNNLinearCombinedRegressor, self).predict(
1105        x=x,
1106        input_fn=input_fn,
1107        batch_size=batch_size,
1108        outputs=outputs,
1109        as_iterable=as_iterable)
1110
1111  @deprecated_arg_values(
1112      estimator.AS_ITERABLE_DATE, estimator.AS_ITERABLE_INSTRUCTIONS,
1113      as_iterable=False)
1114  def predict_scores(self, x=None, input_fn=None, batch_size=None,
1115                     as_iterable=True):
1116    """Returns predicted scores for given features.
1117
1118    Args:
1119      x: features.
1120      input_fn: Input function. If set, x must be None.
1121      batch_size: Override default batch size.
1122      as_iterable: If True, return an iterable which keeps yielding predictions
1123        for each example until inputs are exhausted. Note: The inputs must
1124        terminate if you want the iterable to terminate (e.g. be sure to pass
1125        num_epochs=1 if you are using something like read_batch_features).
1126
1127    Returns:
1128      Numpy array of predicted scores (or an iterable of predicted scores if
1129      as_iterable is True). If `label_dimension == 1`, the shape of the output
1130      is `[batch_size]`, otherwise the shape is `[batch_size, label_dimension]`.
1131    """
1132    key = prediction_key.PredictionKey.SCORES
1133    preds = super(DNNLinearCombinedRegressor, self).predict(
1134        x=x,
1135        input_fn=input_fn,
1136        batch_size=batch_size,
1137        outputs=[key],
1138        as_iterable=as_iterable)
1139    if as_iterable:
1140      return (pred[key] for pred in preds)
1141    return preds[key]
1142
1143  @deprecated("2017-03-25", "Please use Estimator.export_savedmodel() instead.")
1144  def export(self,
1145             export_dir,
1146             input_fn=None,
1147             input_feature_key=None,
1148             use_deprecated_input_fn=True,
1149             signature_fn=None,
1150             default_batch_size=1,
1151             exports_to_keep=None):
1152    """See BaseEstimator.export."""
1153    def default_input_fn(unused_estimator, examples):
1154      return layers.parse_feature_columns_from_examples(
1155          examples, self._feature_columns)
1156    return super(DNNLinearCombinedRegressor, self).export(
1157        export_dir=export_dir,
1158        input_fn=input_fn or default_input_fn,
1159        input_feature_key=input_feature_key,
1160        use_deprecated_input_fn=use_deprecated_input_fn,
1161        signature_fn=signature_fn or export.regression_signature_fn,
1162        prediction_key=prediction_key.PredictionKey.SCORES,
1163        default_batch_size=default_batch_size,
1164        exports_to_keep=exports_to_keep)
1165
1166# Aliases
1167# TODO(zakaria): Remove these aliases, See b/34751732
1168_DNNLinearCombinedEstimator = DNNLinearCombinedEstimator
1169