• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for DNNEstimators."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import functools
22import json
23import tempfile
24
25import numpy as np
26
27from tensorflow.contrib.layers.python.layers import feature_column
28from tensorflow.contrib.learn.python.learn import experiment
29from tensorflow.contrib.learn.python.learn.datasets import base
30from tensorflow.contrib.learn.python.learn.estimators import _sklearn
31from tensorflow.contrib.learn.python.learn.estimators import dnn
32from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined
33from tensorflow.contrib.learn.python.learn.estimators import estimator
34from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
35from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
36from tensorflow.contrib.learn.python.learn.estimators import model_fn
37from tensorflow.contrib.learn.python.learn.estimators import run_config
38from tensorflow.contrib.learn.python.learn.estimators import test_data
39from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
40from tensorflow.contrib.metrics.python.ops import metric_ops
41from tensorflow.python.feature_column import feature_column_lib as fc_core
42from tensorflow.python.framework import constant_op
43from tensorflow.python.framework import dtypes
44from tensorflow.python.framework import sparse_tensor
45from tensorflow.python.ops import array_ops
46from tensorflow.python.ops import init_ops
47from tensorflow.python.ops import math_ops
48from tensorflow.python.platform import test
49from tensorflow.python.training import input as input_lib
50from tensorflow.python.training import monitored_session
51from tensorflow.python.training import server_lib
52
53
54class EmbeddingMultiplierTest(test.TestCase):
55  """dnn_model_fn tests."""
56
57  def testRaisesNonEmbeddingColumn(self):
58    one_hot_language = feature_column.one_hot_column(
59        feature_column.sparse_column_with_hash_bucket('language', 10))
60
61    params = {
62        'feature_columns': [one_hot_language],
63        'head': head_lib.multi_class_head(2),
64        'hidden_units': [1],
65        # Set lr mult to 0. to keep embeddings constant.
66        'embedding_lr_multipliers': {
67            one_hot_language: 0.0
68        },
69    }
70    features = {
71        'language':
72            sparse_tensor.SparseTensor(
73                values=['en', 'fr', 'zh'],
74                indices=[[0, 0], [1, 0], [2, 0]],
75                dense_shape=[3, 1]),
76    }
77    labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32)
78    with self.assertRaisesRegexp(ValueError,
79                                 'can only be defined for embedding columns'):
80      dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN, params)
81
82  def testMultipliesGradient(self):
83    embedding_language = feature_column.embedding_column(
84        feature_column.sparse_column_with_hash_bucket('language', 10),
85        dimension=1,
86        initializer=init_ops.constant_initializer(0.1))
87    embedding_wire = feature_column.embedding_column(
88        feature_column.sparse_column_with_hash_bucket('wire', 10),
89        dimension=1,
90        initializer=init_ops.constant_initializer(0.1))
91
92    params = {
93        'feature_columns': [embedding_language, embedding_wire],
94        'head': head_lib.multi_class_head(2),
95        'hidden_units': [1],
96        # Set lr mult to 0. to keep embeddings constant.
97        'embedding_lr_multipliers': {
98            embedding_language: 0.0
99        },
100    }
101    features = {
102        'language':
103            sparse_tensor.SparseTensor(
104                values=['en', 'fr', 'zh'],
105                indices=[[0, 0], [1, 0], [2, 0]],
106                dense_shape=[3, 1]),
107        'wire':
108            sparse_tensor.SparseTensor(
109                values=['omar', 'stringer', 'marlo'],
110                indices=[[0, 0], [1, 0], [2, 0]],
111                dense_shape=[3, 1]),
112    }
113    labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32)
114    model_ops = dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN,
115                                  params)
116    with monitored_session.MonitoredSession() as sess:
117      language_var = dnn_linear_combined._get_embedding_variable(
118          embedding_language, 'dnn', 'dnn/input_from_feature_columns')
119      wire_var = dnn_linear_combined._get_embedding_variable(
120          embedding_wire, 'dnn', 'dnn/input_from_feature_columns')
121      for _ in range(2):
122        _, language_value, wire_value = sess.run(
123            [model_ops.train_op, language_var, wire_var])
124      initial_value = np.full_like(language_value, 0.1)
125      self.assertTrue(np.all(np.isclose(language_value, initial_value)))
126      self.assertFalse(np.all(np.isclose(wire_value, initial_value)))
127
128
129class ActivationFunctionTest(test.TestCase):
130
131  def _getModelForActivation(self, activation_fn):
132    embedding_language = feature_column.embedding_column(
133        feature_column.sparse_column_with_hash_bucket('language', 10),
134        dimension=1,
135        initializer=init_ops.constant_initializer(0.1))
136    params = {
137        'feature_columns': [embedding_language],
138        'head': head_lib.multi_class_head(2),
139        'hidden_units': [1],
140        'activation_fn': activation_fn,
141    }
142    features = {
143        'language':
144            sparse_tensor.SparseTensor(
145                values=['en', 'fr', 'zh'],
146                indices=[[0, 0], [1, 0], [2, 0]],
147                dense_shape=[3, 1]),
148    }
149    labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32)
150    return dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN, params)
151
152  def testValidActivation(self):
153    _ = self._getModelForActivation('relu')
154
155  def testRaisesOnBadActivationName(self):
156    with self.assertRaisesRegexp(ValueError,
157                                 'Activation name should be one of'):
158      self._getModelForActivation('max_pool')
159
160
161class DNNEstimatorTest(test.TestCase):
162
163  def _assertInRange(self, expected_min, expected_max, actual):
164    self.assertLessEqual(expected_min, actual)
165    self.assertGreaterEqual(expected_max, actual)
166
167  def testExperimentIntegration(self):
168    exp = experiment.Experiment(
169        estimator=dnn.DNNClassifier(
170            n_classes=3,
171            feature_columns=[
172                feature_column.real_valued_column(
173                    'feature', dimension=4)
174            ],
175            hidden_units=[3, 3]),
176        train_input_fn=test_data.iris_input_multiclass_fn,
177        eval_input_fn=test_data.iris_input_multiclass_fn)
178    exp.test()
179
180  def testEstimatorContract(self):
181    estimator_test_utils.assert_estimator_contract(self, dnn.DNNEstimator)
182
183  def testTrainWithWeights(self):
184    """Tests training with given weight column."""
185
186    def _input_fn_train():
187      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
188      # First row has more weight than others. Model should fit (y=x) better
189      # than (y=Not(x)) due to the relative higher weight of the first row.
190      labels = constant_op.constant([[1], [0], [0], [0]])
191      features = {
192          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
193          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
194      }
195      return features, labels
196
197    def _input_fn_eval():
198      # Create 4 rows (y = x)
199      labels = constant_op.constant([[1], [1], [1], [1]])
200      features = {
201          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
202          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
203      }
204      return features, labels
205
206    dnn_estimator = dnn.DNNEstimator(
207        head=head_lib.multi_class_head(2, weight_column_name='w'),
208        feature_columns=[feature_column.real_valued_column('x')],
209        hidden_units=[3, 3],
210        config=run_config.RunConfig(tf_random_seed=1))
211
212    dnn_estimator.fit(input_fn=_input_fn_train, steps=5)
213    scores = dnn_estimator.evaluate(input_fn=_input_fn_eval, steps=1)
214    self._assertInRange(0.0, 1.0, scores['accuracy'])
215
216
217class DNNClassifierTest(test.TestCase):
218
219  def testExperimentIntegration(self):
220    exp = experiment.Experiment(
221        estimator=dnn.DNNClassifier(
222            n_classes=3,
223            feature_columns=[
224                feature_column.real_valued_column(
225                    'feature', dimension=4)
226            ],
227            hidden_units=[3, 3]),
228        train_input_fn=test_data.iris_input_multiclass_fn,
229        eval_input_fn=test_data.iris_input_multiclass_fn)
230    exp.test()
231
232  def _assertInRange(self, expected_min, expected_max, actual):
233    self.assertLessEqual(expected_min, actual)
234    self.assertGreaterEqual(expected_max, actual)
235
236  def testEstimatorContract(self):
237    estimator_test_utils.assert_estimator_contract(self, dnn.DNNClassifier)
238
239  def testEmbeddingMultiplier(self):
240    embedding_language = feature_column.embedding_column(
241        feature_column.sparse_column_with_hash_bucket('language', 10),
242        dimension=1,
243        initializer=init_ops.constant_initializer(0.1))
244    classifier = dnn.DNNClassifier(
245        feature_columns=[embedding_language],
246        hidden_units=[3, 3],
247        embedding_lr_multipliers={embedding_language: 0.8})
248    self.assertEqual({
249        embedding_language: 0.8
250    }, classifier.params['embedding_lr_multipliers'])
251
252  def testInputPartitionSize(self):
253    def _input_fn_float_label(num_epochs=None):
254      features = {
255          'language':
256              sparse_tensor.SparseTensor(
257                  values=input_lib.limit_epochs(
258                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
259                  indices=[[0, 0], [0, 1], [2, 0]],
260                  dense_shape=[3, 2])
261      }
262      labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32)
263      return features, labels
264
265    language_column = feature_column.sparse_column_with_hash_bucket(
266        'language', hash_bucket_size=20)
267    feature_columns = [
268        feature_column.embedding_column(language_column, dimension=1),
269    ]
270
271    # Set num_ps_replica to be 10 and the min slice size to be extremely small,
272    # so as to ensure that there'll be 10 partititions produced.
273    config = run_config.RunConfig(tf_random_seed=1)
274    config._num_ps_replicas = 10
275    classifier = dnn.DNNClassifier(
276        n_classes=2,
277        feature_columns=feature_columns,
278        hidden_units=[3, 3],
279        optimizer='Adagrad',
280        config=config,
281        input_layer_min_slice_size=1)
282
283    # Ensure the param is passed in.
284    self.assertEqual(1, classifier.params['input_layer_min_slice_size'])
285
286    # Ensure the partition count is 10.
287    classifier.fit(input_fn=_input_fn_float_label, steps=50)
288    partition_count = 0
289    for name in classifier.get_variable_names():
290      if 'language_embedding' in name and 'Adagrad' in name:
291        partition_count += 1
292    self.assertEqual(10, partition_count)
293
294  def testLogisticRegression_MatrixData(self):
295    """Tests binary classification using matrix data as input."""
296    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
297
298    classifier = dnn.DNNClassifier(
299        feature_columns=cont_features,
300        hidden_units=[3, 3],
301        config=run_config.RunConfig(tf_random_seed=1))
302
303    input_fn = test_data.iris_input_logistic_fn
304    classifier.fit(input_fn=input_fn, steps=5)
305    scores = classifier.evaluate(input_fn=input_fn, steps=1)
306    self._assertInRange(0.0, 1.0, scores['accuracy'])
307    self.assertIn('loss', scores)
308
309  def testLogisticRegression_MatrixData_Labels1D(self):
310    """Same as the last test, but label shape is [100] instead of [100, 1]."""
311
312    def _input_fn():
313      iris = test_data.prepare_iris_data_for_logistic_regression()
314      return {
315          'feature': constant_op.constant(
316              iris.data, dtype=dtypes.float32)
317      }, constant_op.constant(
318          iris.target, shape=[100], dtype=dtypes.int32)
319
320    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
321
322    classifier = dnn.DNNClassifier(
323        feature_columns=cont_features,
324        hidden_units=[3, 3],
325        config=run_config.RunConfig(tf_random_seed=1))
326
327    classifier.fit(input_fn=_input_fn, steps=5)
328    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
329    self.assertIn('loss', scores)
330
331  def testLogisticRegression_NpMatrixData(self):
332    """Tests binary classification using numpy matrix data as input."""
333    iris = test_data.prepare_iris_data_for_logistic_regression()
334    train_x = iris.data
335    train_y = iris.target
336    feature_columns = [feature_column.real_valued_column('', dimension=4)]
337    classifier = dnn.DNNClassifier(
338        feature_columns=feature_columns,
339        hidden_units=[3, 3],
340        config=run_config.RunConfig(tf_random_seed=1))
341
342    classifier.fit(x=train_x, y=train_y, steps=5)
343    scores = classifier.evaluate(x=train_x, y=train_y, steps=1)
344    self._assertInRange(0.0, 1.0, scores['accuracy'])
345
346  def _assertBinaryPredictions(self, expected_len, predictions):
347    self.assertEqual(expected_len, len(predictions))
348    for prediction in predictions:
349      self.assertIn(prediction, (0, 1))
350
351  def _assertClassificationPredictions(
352      self, expected_len, n_classes, predictions):
353    self.assertEqual(expected_len, len(predictions))
354    for prediction in predictions:
355      self.assertIn(prediction, range(n_classes))
356
357  def _assertProbabilities(self, expected_batch_size, expected_n_classes,
358                           probabilities):
359    self.assertEqual(expected_batch_size, len(probabilities))
360    for b in range(expected_batch_size):
361      self.assertEqual(expected_n_classes, len(probabilities[b]))
362      for i in range(expected_n_classes):
363        self._assertInRange(0.0, 1.0, probabilities[b][i])
364
365  def testEstimatorWithCoreFeatureColumns(self):
366
367    def _input_fn(num_epochs=None):
368      features = {
369          'age':
370              input_lib.limit_epochs(
371                  constant_op.constant([[.8], [0.2], [.1]]),
372                  num_epochs=num_epochs),
373          'language':
374              sparse_tensor.SparseTensor(
375                  values=input_lib.limit_epochs(
376                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
377                  indices=[[0, 0], [0, 1], [2, 0]],
378                  dense_shape=[3, 2])
379      }
380      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
381
382    language_column = fc_core.categorical_column_with_hash_bucket(
383        'language', hash_bucket_size=20)
384    feature_columns = [
385        fc_core.embedding_column(language_column, dimension=1),
386        fc_core.numeric_column('age')
387    ]
388
389    classifier = dnn.DNNClassifier(
390        n_classes=2,
391        feature_columns=feature_columns,
392        hidden_units=[10, 10],
393        config=run_config.RunConfig(tf_random_seed=1))
394
395    classifier.fit(input_fn=_input_fn, steps=50)
396
397    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
398    self._assertInRange(0.0, 1.0, scores['accuracy'])
399    self.assertIn('loss', scores)
400    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
401    predicted_classes = list(
402        classifier.predict_classes(input_fn=predict_input_fn, as_iterable=True))
403    self._assertBinaryPredictions(3, predicted_classes)
404    predictions = list(
405        classifier.predict(input_fn=predict_input_fn, as_iterable=True))
406    self.assertAllEqual(predicted_classes, predictions)
407
408  def testLogisticRegression_TensorData(self):
409    """Tests binary classification using tensor data as input."""
410
411    def _input_fn(num_epochs=None):
412      features = {
413          'age':
414              input_lib.limit_epochs(
415                  constant_op.constant([[.8], [0.2], [.1]]),
416                  num_epochs=num_epochs),
417          'language':
418              sparse_tensor.SparseTensor(
419                  values=input_lib.limit_epochs(
420                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
421                  indices=[[0, 0], [0, 1], [2, 0]],
422                  dense_shape=[3, 2])
423      }
424      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
425
426    language_column = feature_column.sparse_column_with_hash_bucket(
427        'language', hash_bucket_size=20)
428    feature_columns = [
429        feature_column.embedding_column(
430            language_column, dimension=1),
431        feature_column.real_valued_column('age')
432    ]
433
434    classifier = dnn.DNNClassifier(
435        n_classes=2,
436        feature_columns=feature_columns,
437        hidden_units=[10, 10],
438        config=run_config.RunConfig(tf_random_seed=1))
439
440    classifier.fit(input_fn=_input_fn, steps=50)
441
442    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
443    self._assertInRange(0.0, 1.0, scores['accuracy'])
444    self.assertIn('loss', scores)
445    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
446    predicted_classes = list(
447        classifier.predict_classes(
448            input_fn=predict_input_fn, as_iterable=True))
449    self._assertBinaryPredictions(3, predicted_classes)
450    predictions = list(
451        classifier.predict(input_fn=predict_input_fn, as_iterable=True))
452    self.assertAllEqual(predicted_classes, predictions)
453
454  def testLogisticRegression_FloatLabel(self):
455    """Tests binary classification with float labels."""
456
457    def _input_fn_float_label(num_epochs=None):
458      features = {
459          'age':
460              input_lib.limit_epochs(
461                  constant_op.constant([[50], [20], [10]]),
462                  num_epochs=num_epochs),
463          'language':
464              sparse_tensor.SparseTensor(
465                  values=input_lib.limit_epochs(
466                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
467                  indices=[[0, 0], [0, 1], [2, 0]],
468                  dense_shape=[3, 2])
469      }
470      labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32)
471      return features, labels
472
473    language_column = feature_column.sparse_column_with_hash_bucket(
474        'language', hash_bucket_size=20)
475    feature_columns = [
476        feature_column.embedding_column(
477            language_column, dimension=1),
478        feature_column.real_valued_column('age')
479    ]
480
481    classifier = dnn.DNNClassifier(
482        n_classes=2,
483        feature_columns=feature_columns,
484        hidden_units=[3, 3],
485        config=run_config.RunConfig(tf_random_seed=1))
486
487    classifier.fit(input_fn=_input_fn_float_label, steps=50)
488
489    predict_input_fn = functools.partial(_input_fn_float_label, num_epochs=1)
490    predicted_classes = list(
491        classifier.predict_classes(
492            input_fn=predict_input_fn, as_iterable=True))
493    self._assertBinaryPredictions(3, predicted_classes)
494    predictions = list(
495        classifier.predict(
496            input_fn=predict_input_fn, as_iterable=True))
497    self.assertAllEqual(predicted_classes, predictions)
498    predictions_proba = list(
499        classifier.predict_proba(
500            input_fn=predict_input_fn, as_iterable=True))
501    self._assertProbabilities(3, 2, predictions_proba)
502
503  def testMultiClass_MatrixData(self):
504    """Tests multi-class classification using matrix data as input."""
505    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
506
507    classifier = dnn.DNNClassifier(
508        n_classes=3,
509        feature_columns=cont_features,
510        hidden_units=[3, 3],
511        config=run_config.RunConfig(tf_random_seed=1))
512
513    input_fn = test_data.iris_input_multiclass_fn
514    classifier.fit(input_fn=input_fn, steps=200)
515    scores = classifier.evaluate(input_fn=input_fn, steps=1)
516    self._assertInRange(0.0, 1.0, scores['accuracy'])
517    self.assertIn('loss', scores)
518
519  def testMultiClass_MatrixData_Labels1D(self):
520    """Same as the last test, but label shape is [150] instead of [150, 1]."""
521
522    def _input_fn():
523      iris = base.load_iris()
524      return {
525          'feature': constant_op.constant(
526              iris.data, dtype=dtypes.float32)
527      }, constant_op.constant(
528          iris.target, shape=[150], dtype=dtypes.int32)
529
530    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
531
532    classifier = dnn.DNNClassifier(
533        n_classes=3,
534        feature_columns=cont_features,
535        hidden_units=[3, 3],
536        config=run_config.RunConfig(tf_random_seed=1))
537
538    classifier.fit(input_fn=_input_fn, steps=200)
539    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
540    self._assertInRange(0.0, 1.0, scores['accuracy'])
541
542  def testMultiClass_NpMatrixData(self):
543    """Tests multi-class classification using numpy matrix data as input."""
544    iris = base.load_iris()
545    train_x = iris.data
546    train_y = iris.target
547    feature_columns = [feature_column.real_valued_column('', dimension=4)]
548    classifier = dnn.DNNClassifier(
549        n_classes=3,
550        feature_columns=feature_columns,
551        hidden_units=[3, 3],
552        config=run_config.RunConfig(tf_random_seed=1))
553
554    classifier.fit(x=train_x, y=train_y, steps=200)
555    scores = classifier.evaluate(x=train_x, y=train_y, steps=1)
556    self._assertInRange(0.0, 1.0, scores['accuracy'])
557
558  def testMultiClassLabelKeys(self):
559    """Tests n_classes > 2 with label_keys vocabulary for labels."""
560    # Byte literals needed for python3 test to pass.
561    label_keys = [b'label0', b'label1', b'label2']
562
563    def _input_fn(num_epochs=None):
564      features = {
565          'age':
566              input_lib.limit_epochs(
567                  constant_op.constant([[.8], [0.2], [.1]]),
568                  num_epochs=num_epochs),
569          'language':
570              sparse_tensor.SparseTensor(
571                  values=input_lib.limit_epochs(
572                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
573                  indices=[[0, 0], [0, 1], [2, 0]],
574                  dense_shape=[3, 2])
575      }
576      labels = constant_op.constant(
577          [[label_keys[1]], [label_keys[0]], [label_keys[0]]],
578          dtype=dtypes.string)
579      return features, labels
580
581    language_column = feature_column.sparse_column_with_hash_bucket(
582        'language', hash_bucket_size=20)
583    feature_columns = [
584        feature_column.embedding_column(
585            language_column, dimension=1),
586        feature_column.real_valued_column('age')
587    ]
588
589    classifier = dnn.DNNClassifier(
590        n_classes=3,
591        feature_columns=feature_columns,
592        hidden_units=[10, 10],
593        label_keys=label_keys,
594        config=run_config.RunConfig(tf_random_seed=1))
595
596    classifier.fit(input_fn=_input_fn, steps=50)
597
598    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
599    self._assertInRange(0.0, 1.0, scores['accuracy'])
600    self.assertIn('loss', scores)
601    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
602    predicted_classes = list(
603        classifier.predict_classes(
604            input_fn=predict_input_fn, as_iterable=True))
605    self.assertEqual(3, len(predicted_classes))
606    for pred in predicted_classes:
607      self.assertIn(pred, label_keys)
608    predictions = list(
609        classifier.predict(input_fn=predict_input_fn, as_iterable=True))
610    self.assertAllEqual(predicted_classes, predictions)
611
612  def testLoss(self):
613    """Tests loss calculation."""
614
615    def _input_fn_train():
616      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
617      # The logistic prediction should be (y = 0.25).
618      labels = constant_op.constant([[1], [0], [0], [0]])
619      features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
620      return features, labels
621
622    classifier = dnn.DNNClassifier(
623        n_classes=2,
624        feature_columns=[feature_column.real_valued_column('x')],
625        hidden_units=[3, 3],
626        config=run_config.RunConfig(tf_random_seed=1))
627
628    classifier.fit(input_fn=_input_fn_train, steps=5)
629    scores = classifier.evaluate(input_fn=_input_fn_train, steps=1)
630    self.assertIn('loss', scores)
631
632  def testLossWithWeights(self):
633    """Tests loss calculation with weights."""
634
635    def _input_fn_train():
636      # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
637      # The logistic prediction should be (y = 0.25).
638      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
639      features = {
640          'x': array_ops.ones(
641              shape=[4, 1], dtype=dtypes.float32),
642          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
643      }
644      return features, labels
645
646    def _input_fn_eval():
647      # 4 rows, with different weights.
648      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
649      features = {
650          'x': array_ops.ones(
651              shape=[4, 1], dtype=dtypes.float32),
652          'w': constant_op.constant([[7.], [1.], [1.], [1.]])
653      }
654      return features, labels
655
656    classifier = dnn.DNNClassifier(
657        weight_column_name='w',
658        n_classes=2,
659        feature_columns=[feature_column.real_valued_column('x')],
660        hidden_units=[3, 3],
661        config=run_config.RunConfig(tf_random_seed=1))
662
663    classifier.fit(input_fn=_input_fn_train, steps=5)
664    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
665    self.assertIn('loss', scores)
666
667  def testTrainWithWeights(self):
668    """Tests training with given weight column."""
669
670    def _input_fn_train():
671      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
672      # First row has more weight than others. Model should fit (y=x) better
673      # than (y=Not(x)) due to the relative higher weight of the first row.
674      labels = constant_op.constant([[1], [0], [0], [0]])
675      features = {
676          'x': array_ops.ones(
677              shape=[4, 1], dtype=dtypes.float32),
678          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
679      }
680      return features, labels
681
682    def _input_fn_eval():
683      # Create 4 rows (y = x)
684      labels = constant_op.constant([[1], [1], [1], [1]])
685      features = {
686          'x': array_ops.ones(
687              shape=[4, 1], dtype=dtypes.float32),
688          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
689      }
690      return features, labels
691
692    classifier = dnn.DNNClassifier(
693        weight_column_name='w',
694        feature_columns=[feature_column.real_valued_column('x')],
695        hidden_units=[3, 3],
696        config=run_config.RunConfig(tf_random_seed=1))
697
698    classifier.fit(input_fn=_input_fn_train, steps=5)
699    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
700    self._assertInRange(0.0, 1.0, scores['accuracy'])
701
702  def testPredict_AsIterableFalse(self):
703    """Tests predict and predict_prob methods with as_iterable=False."""
704
705    def _input_fn(num_epochs=None):
706      features = {
707          'age':
708              input_lib.limit_epochs(
709                  constant_op.constant([[.8], [.2], [.1]]),
710                  num_epochs=num_epochs),
711          'language':
712              sparse_tensor.SparseTensor(
713                  values=input_lib.limit_epochs(
714                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
715                  indices=[[0, 0], [0, 1], [2, 0]],
716                  dense_shape=[3, 2])
717      }
718      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
719
720    sparse_column = feature_column.sparse_column_with_hash_bucket(
721        'language', hash_bucket_size=20)
722    feature_columns = [
723        feature_column.embedding_column(
724            sparse_column, dimension=1)
725    ]
726
727    n_classes = 3
728    classifier = dnn.DNNClassifier(
729        n_classes=n_classes,
730        feature_columns=feature_columns,
731        hidden_units=[10, 10],
732        config=run_config.RunConfig(tf_random_seed=1))
733
734    classifier.fit(input_fn=_input_fn, steps=100)
735
736    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
737    self._assertInRange(0.0, 1.0, scores['accuracy'])
738    self.assertIn('loss', scores)
739    predicted_classes = classifier.predict_classes(
740        input_fn=_input_fn, as_iterable=False)
741    self._assertClassificationPredictions(3, n_classes, predicted_classes)
742    predictions = classifier.predict(input_fn=_input_fn, as_iterable=False)
743    self.assertAllEqual(predicted_classes, predictions)
744    probabilities = classifier.predict_proba(
745        input_fn=_input_fn, as_iterable=False)
746    self._assertProbabilities(3, n_classes, probabilities)
747
748  def testPredict_AsIterable(self):
749    """Tests predict and predict_prob methods with as_iterable=True."""
750
751    def _input_fn(num_epochs=None):
752      features = {
753          'age':
754              input_lib.limit_epochs(
755                  constant_op.constant([[.8], [.2], [.1]]),
756                  num_epochs=num_epochs),
757          'language':
758              sparse_tensor.SparseTensor(
759                  values=input_lib.limit_epochs(
760                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
761                  indices=[[0, 0], [0, 1], [2, 0]],
762                  dense_shape=[3, 2])
763      }
764      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
765
766    language_column = feature_column.sparse_column_with_hash_bucket(
767        'language', hash_bucket_size=20)
768    feature_columns = [
769        feature_column.embedding_column(
770            language_column, dimension=1),
771        feature_column.real_valued_column('age')
772    ]
773
774    n_classes = 3
775    classifier = dnn.DNNClassifier(
776        n_classes=n_classes,
777        feature_columns=feature_columns,
778        hidden_units=[3, 3],
779        config=run_config.RunConfig(tf_random_seed=1))
780
781    classifier.fit(input_fn=_input_fn, steps=300)
782
783    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
784    self._assertInRange(0.0, 1.0, scores['accuracy'])
785    self.assertIn('loss', scores)
786    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
787    predicted_classes = list(
788        classifier.predict_classes(
789            input_fn=predict_input_fn, as_iterable=True))
790    self._assertClassificationPredictions(3, n_classes, predicted_classes)
791    predictions = list(
792        classifier.predict(
793            input_fn=predict_input_fn, as_iterable=True))
794    self.assertAllEqual(predicted_classes, predictions)
795    predicted_proba = list(
796        classifier.predict_proba(
797            input_fn=predict_input_fn, as_iterable=True))
798    self._assertProbabilities(3, n_classes, predicted_proba)
799
800  def testCustomMetrics(self):
801    """Tests custom evaluation metrics."""
802
803    def _input_fn(num_epochs=None):
804      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
805      labels = constant_op.constant([[1], [0], [0], [0]])
806      features = {
807          'x':
808              input_lib.limit_epochs(
809                  array_ops.ones(
810                      shape=[4, 1], dtype=dtypes.float32),
811                  num_epochs=num_epochs),
812      }
813      return features, labels
814
815    def _my_metric_op(predictions, labels):
816      # For the case of binary classification, the 2nd column of "predictions"
817      # denotes the model predictions.
818      labels = math_ops.cast(labels, dtypes.float32)
819      predictions = array_ops.strided_slice(
820          predictions, [0, 1], [-1, 2], end_mask=1)
821      labels = math_ops.cast(labels, predictions.dtype)
822      return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
823
824    classifier = dnn.DNNClassifier(
825        feature_columns=[feature_column.real_valued_column('x')],
826        hidden_units=[3, 3],
827        config=run_config.RunConfig(tf_random_seed=1))
828
829    classifier.fit(input_fn=_input_fn, steps=5)
830    scores = classifier.evaluate(
831        input_fn=_input_fn,
832        steps=5,
833        metrics={
834            'my_accuracy':
835                MetricSpec(
836                    metric_fn=metric_ops.streaming_accuracy,
837                    prediction_key='classes'),
838            'my_precision':
839                MetricSpec(
840                    metric_fn=metric_ops.streaming_precision,
841                    prediction_key='classes'),
842            'my_metric':
843                MetricSpec(
844                    metric_fn=_my_metric_op, prediction_key='probabilities')
845        })
846    self.assertTrue(
847        set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset(
848            set(scores.keys())))
849    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
850    predictions = np.array(list(classifier.predict_classes(
851        input_fn=predict_input_fn)))
852    self.assertEqual(
853        _sklearn.accuracy_score([1, 0, 0, 0], predictions),
854        scores['my_accuracy'])
855
856    # Test the case where the 2nd element of the key is neither "classes" nor
857    # "probabilities".
858    with self.assertRaisesRegexp(KeyError, 'bad_type'):
859      classifier.evaluate(
860          input_fn=_input_fn,
861          steps=5,
862          metrics={
863              'bad_name':
864                  MetricSpec(
865                      metric_fn=metric_ops.streaming_auc,
866                      prediction_key='bad_type')
867          })
868
869  def testTrainSaveLoad(self):
870    """Tests that insures you can save and reload a trained model."""
871
872    def _input_fn(num_epochs=None):
873      features = {
874          'age':
875              input_lib.limit_epochs(
876                  constant_op.constant([[.8], [.2], [.1]]),
877                  num_epochs=num_epochs),
878          'language':
879              sparse_tensor.SparseTensor(
880                  values=input_lib.limit_epochs(
881                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
882                  indices=[[0, 0], [0, 1], [2, 0]],
883                  dense_shape=[3, 2])
884      }
885      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
886
887    sparse_column = feature_column.sparse_column_with_hash_bucket(
888        'language', hash_bucket_size=20)
889    feature_columns = [
890        feature_column.embedding_column(
891            sparse_column, dimension=1)
892    ]
893
894    model_dir = tempfile.mkdtemp()
895    classifier = dnn.DNNClassifier(
896        model_dir=model_dir,
897        n_classes=3,
898        feature_columns=feature_columns,
899        hidden_units=[3, 3],
900        config=run_config.RunConfig(tf_random_seed=1))
901
902    classifier.fit(input_fn=_input_fn, steps=5)
903    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
904    predictions1 = classifier.predict_classes(input_fn=predict_input_fn)
905    del classifier
906
907    classifier2 = dnn.DNNClassifier(
908        model_dir=model_dir,
909        n_classes=3,
910        feature_columns=feature_columns,
911        hidden_units=[3, 3],
912        config=run_config.RunConfig(tf_random_seed=1))
913    predictions2 = classifier2.predict_classes(input_fn=predict_input_fn)
914    self.assertEqual(list(predictions1), list(predictions2))
915
916  def testTrainWithPartitionedVariables(self):
917    """Tests training with partitioned variables."""
918
919    def _input_fn(num_epochs=None):
920      features = {
921          'age':
922              input_lib.limit_epochs(
923                  constant_op.constant([[.8], [.2], [.1]]),
924                  num_epochs=num_epochs),
925          'language':
926              sparse_tensor.SparseTensor(
927                  values=input_lib.limit_epochs(
928                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
929                  indices=[[0, 0], [0, 1], [2, 0]],
930                  dense_shape=[3, 2])
931      }
932      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
933
934    # The given hash_bucket_size results in variables larger than the
935    # default min_slice_size attribute, so the variables are partitioned.
936    sparse_column = feature_column.sparse_column_with_hash_bucket(
937        'language', hash_bucket_size=2e7)
938    feature_columns = [
939        feature_column.embedding_column(
940            sparse_column, dimension=1)
941    ]
942
943    tf_config = {
944        'cluster': {
945            run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
946        }
947    }
948    with test.mock.patch.dict('os.environ',
949                              {'TF_CONFIG': json.dumps(tf_config)}):
950      config = run_config.RunConfig(tf_random_seed=1)
951      # Because we did not start a distributed cluster, we need to pass an
952      # empty ClusterSpec, otherwise the device_setter will look for
953      # distributed jobs, such as "/job:ps" which are not present.
954      config._cluster_spec = server_lib.ClusterSpec({})
955
956    classifier = dnn.DNNClassifier(
957        n_classes=3,
958        feature_columns=feature_columns,
959        hidden_units=[3, 3],
960        config=config)
961
962    classifier.fit(input_fn=_input_fn, steps=5)
963    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
964    self._assertInRange(0.0, 1.0, scores['accuracy'])
965    self.assertIn('loss', scores)
966
967  def testExport(self):
968    """Tests export model for servo."""
969
970    def input_fn():
971      return {
972          'age':
973              constant_op.constant([1]),
974          'language':
975              sparse_tensor.SparseTensor(
976                  values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
977      }, constant_op.constant([[1]])
978
979    language = feature_column.sparse_column_with_hash_bucket('language', 100)
980    feature_columns = [
981        feature_column.real_valued_column('age'),
982        feature_column.embedding_column(
983            language, dimension=1)
984    ]
985
986    classifier = dnn.DNNClassifier(
987        feature_columns=feature_columns, hidden_units=[3, 3])
988    classifier.fit(input_fn=input_fn, steps=5)
989
990    export_dir = tempfile.mkdtemp()
991    classifier.export(export_dir)
992
993  def testEnableCenteredBias(self):
994    """Tests that we can enable centered bias."""
995    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
996
997    classifier = dnn.DNNClassifier(
998        n_classes=3,
999        feature_columns=cont_features,
1000        hidden_units=[3, 3],
1001        enable_centered_bias=True,
1002        config=run_config.RunConfig(tf_random_seed=1))
1003
1004    input_fn = test_data.iris_input_multiclass_fn
1005    classifier.fit(input_fn=input_fn, steps=5)
1006    self.assertIn('dnn/multi_class_head/centered_bias_weight',
1007                  classifier.get_variable_names())
1008    scores = classifier.evaluate(input_fn=input_fn, steps=1)
1009    self._assertInRange(0.0, 1.0, scores['accuracy'])
1010    self.assertIn('loss', scores)
1011
1012  def testDisableCenteredBias(self):
1013    """Tests that we can disable centered bias."""
1014    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
1015
1016    classifier = dnn.DNNClassifier(
1017        n_classes=3,
1018        feature_columns=cont_features,
1019        hidden_units=[3, 3],
1020        enable_centered_bias=False,
1021        config=run_config.RunConfig(tf_random_seed=1))
1022
1023    input_fn = test_data.iris_input_multiclass_fn
1024    classifier.fit(input_fn=input_fn, steps=5)
1025    self.assertNotIn('centered_bias_weight', classifier.get_variable_names())
1026    scores = classifier.evaluate(input_fn=input_fn, steps=1)
1027    self._assertInRange(0.0, 1.0, scores['accuracy'])
1028    self.assertIn('loss', scores)
1029
1030
1031class DNNRegressorTest(test.TestCase):
1032
1033  def testExperimentIntegration(self):
1034    exp = experiment.Experiment(
1035        estimator=dnn.DNNRegressor(
1036            feature_columns=[
1037                feature_column.real_valued_column(
1038                    'feature', dimension=4)
1039            ],
1040            hidden_units=[3, 3]),
1041        train_input_fn=test_data.iris_input_logistic_fn,
1042        eval_input_fn=test_data.iris_input_logistic_fn)
1043    exp.test()
1044
1045  def testEstimatorContract(self):
1046    estimator_test_utils.assert_estimator_contract(self, dnn.DNNRegressor)
1047
1048  def testRegression_MatrixData(self):
1049    """Tests regression using matrix data as input."""
1050    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
1051
1052    regressor = dnn.DNNRegressor(
1053        feature_columns=cont_features,
1054        hidden_units=[3, 3],
1055        config=run_config.RunConfig(tf_random_seed=1))
1056
1057    input_fn = test_data.iris_input_logistic_fn
1058    regressor.fit(input_fn=input_fn, steps=200)
1059    scores = regressor.evaluate(input_fn=input_fn, steps=1)
1060    self.assertIn('loss', scores)
1061
1062  def testRegression_MatrixData_Labels1D(self):
1063    """Same as the last test, but label shape is [100] instead of [100, 1]."""
1064
1065    def _input_fn():
1066      iris = test_data.prepare_iris_data_for_logistic_regression()
1067      return {
1068          'feature': constant_op.constant(
1069              iris.data, dtype=dtypes.float32)
1070      }, constant_op.constant(
1071          iris.target, shape=[100], dtype=dtypes.int32)
1072
1073    cont_features = [feature_column.real_valued_column('feature', dimension=4)]
1074
1075    regressor = dnn.DNNRegressor(
1076        feature_columns=cont_features,
1077        hidden_units=[3, 3],
1078        config=run_config.RunConfig(tf_random_seed=1))
1079
1080    regressor.fit(input_fn=_input_fn, steps=200)
1081    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1082    self.assertIn('loss', scores)
1083
1084  def testRegression_NpMatrixData(self):
1085    """Tests binary classification using numpy matrix data as input."""
1086    iris = test_data.prepare_iris_data_for_logistic_regression()
1087    train_x = iris.data
1088    train_y = iris.target
1089    feature_columns = [feature_column.real_valued_column('', dimension=4)]
1090    regressor = dnn.DNNRegressor(
1091        feature_columns=feature_columns,
1092        hidden_units=[3, 3],
1093        config=run_config.RunConfig(tf_random_seed=1))
1094
1095    regressor.fit(x=train_x, y=train_y, steps=200)
1096    scores = regressor.evaluate(x=train_x, y=train_y, steps=1)
1097    self.assertIn('loss', scores)
1098
1099  def testRegression_TensorData(self):
1100    """Tests regression using tensor data as input."""
1101
1102    def _input_fn(num_epochs=None):
1103      features = {
1104          'age':
1105              input_lib.limit_epochs(
1106                  constant_op.constant([[.8], [.15], [0.]]),
1107                  num_epochs=num_epochs),
1108          'language':
1109              sparse_tensor.SparseTensor(
1110                  values=input_lib.limit_epochs(
1111                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1112                  indices=[[0, 0], [0, 1], [2, 0]],
1113                  dense_shape=[3, 2])
1114      }
1115      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
1116
1117    language_column = feature_column.sparse_column_with_hash_bucket(
1118        'language', hash_bucket_size=20)
1119    feature_columns = [
1120        feature_column.embedding_column(
1121            language_column, dimension=1),
1122        feature_column.real_valued_column('age')
1123    ]
1124
1125    regressor = dnn.DNNRegressor(
1126        feature_columns=feature_columns,
1127        hidden_units=[3, 3],
1128        config=run_config.RunConfig(tf_random_seed=1))
1129
1130    regressor.fit(input_fn=_input_fn, steps=200)
1131
1132    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1133    self.assertIn('loss', scores)
1134
1135  def testLoss(self):
1136    """Tests loss calculation."""
1137
1138    def _input_fn_train():
1139      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
1140      # The algorithm should learn (y = 0.25).
1141      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1142      features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),}
1143      return features, labels
1144
1145    regressor = dnn.DNNRegressor(
1146        feature_columns=[feature_column.real_valued_column('x')],
1147        hidden_units=[3, 3],
1148        config=run_config.RunConfig(tf_random_seed=1))
1149
1150    regressor.fit(input_fn=_input_fn_train, steps=5)
1151    scores = regressor.evaluate(input_fn=_input_fn_train, steps=1)
1152    self.assertIn('loss', scores)
1153
1154  def testLossWithWeights(self):
1155    """Tests loss calculation with weights."""
1156
1157    def _input_fn_train():
1158      # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
1159      # The algorithm should learn (y = 0.25).
1160      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1161      features = {
1162          'x': array_ops.ones(
1163              shape=[4, 1], dtype=dtypes.float32),
1164          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
1165      }
1166      return features, labels
1167
1168    def _input_fn_eval():
1169      # 4 rows, with different weights.
1170      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1171      features = {
1172          'x': array_ops.ones(
1173              shape=[4, 1], dtype=dtypes.float32),
1174          'w': constant_op.constant([[7.], [1.], [1.], [1.]])
1175      }
1176      return features, labels
1177
1178    regressor = dnn.DNNRegressor(
1179        weight_column_name='w',
1180        feature_columns=[feature_column.real_valued_column('x')],
1181        hidden_units=[3, 3],
1182        config=run_config.RunConfig(tf_random_seed=1))
1183
1184    regressor.fit(input_fn=_input_fn_train, steps=5)
1185    scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
1186    self.assertIn('loss', scores)
1187
1188  def testTrainWithWeights(self):
1189    """Tests training with given weight column."""
1190
1191    def _input_fn_train():
1192      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
1193      # First row has more weight than others. Model should fit (y=x) better
1194      # than (y=Not(x)) due to the relative higher weight of the first row.
1195      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1196      features = {
1197          'x': array_ops.ones(
1198              shape=[4, 1], dtype=dtypes.float32),
1199          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
1200      }
1201      return features, labels
1202
1203    def _input_fn_eval():
1204      # Create 4 rows (y = x)
1205      labels = constant_op.constant([[1.], [1.], [1.], [1.]])
1206      features = {
1207          'x': array_ops.ones(
1208              shape=[4, 1], dtype=dtypes.float32),
1209          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
1210      }
1211      return features, labels
1212
1213    regressor = dnn.DNNRegressor(
1214        weight_column_name='w',
1215        feature_columns=[feature_column.real_valued_column('x')],
1216        hidden_units=[3, 3],
1217        config=run_config.RunConfig(tf_random_seed=1))
1218
1219    regressor.fit(input_fn=_input_fn_train, steps=5)
1220    scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
1221    self.assertIn('loss', scores)
1222
1223  def _assertRegressionOutputs(
1224      self, predictions, expected_shape):
1225    predictions_nparray = np.array(predictions)
1226    self.assertAllEqual(expected_shape, predictions_nparray.shape)
1227    self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.floating))
1228
1229  def testPredict_AsIterableFalse(self):
1230    """Tests predict method with as_iterable=False."""
1231    labels = [1., 0., 0.2]
1232
1233    def _input_fn(num_epochs=None):
1234      features = {
1235          'age':
1236              input_lib.limit_epochs(
1237                  constant_op.constant([[0.8], [0.15], [0.]]),
1238                  num_epochs=num_epochs),
1239          'language':
1240              sparse_tensor.SparseTensor(
1241                  values=input_lib.limit_epochs(
1242                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1243                  indices=[[0, 0], [0, 1], [2, 0]],
1244                  dense_shape=[3, 2])
1245      }
1246      return features, constant_op.constant(labels, dtype=dtypes.float32)
1247
1248    sparse_column = feature_column.sparse_column_with_hash_bucket(
1249        'language', hash_bucket_size=20)
1250    feature_columns = [
1251        feature_column.embedding_column(
1252            sparse_column, dimension=1),
1253        feature_column.real_valued_column('age')
1254    ]
1255
1256    regressor = dnn.DNNRegressor(
1257        feature_columns=feature_columns,
1258        hidden_units=[3, 3],
1259        config=run_config.RunConfig(tf_random_seed=1))
1260
1261    regressor.fit(input_fn=_input_fn, steps=200)
1262
1263    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1264    self.assertIn('loss', scores)
1265    predicted_scores = regressor.predict_scores(
1266        input_fn=_input_fn, as_iterable=False)
1267    self._assertRegressionOutputs(predicted_scores, [3])
1268    predictions = regressor.predict(input_fn=_input_fn, as_iterable=False)
1269    self.assertAllClose(predicted_scores, predictions)
1270
1271  def testPredict_AsIterable(self):
1272    """Tests predict method with as_iterable=True."""
1273    labels = [1., 0., 0.2]
1274
1275    def _input_fn(num_epochs=None):
1276      features = {
1277          'age':
1278              input_lib.limit_epochs(
1279                  constant_op.constant([[0.8], [0.15], [0.]]),
1280                  num_epochs=num_epochs),
1281          'language':
1282              sparse_tensor.SparseTensor(
1283                  values=input_lib.limit_epochs(
1284                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1285                  indices=[[0, 0], [0, 1], [2, 0]],
1286                  dense_shape=[3, 2])
1287      }
1288      return features, constant_op.constant(labels, dtype=dtypes.float32)
1289
1290    sparse_column = feature_column.sparse_column_with_hash_bucket(
1291        'language', hash_bucket_size=20)
1292    feature_columns = [
1293        feature_column.embedding_column(
1294            sparse_column, dimension=1),
1295        feature_column.real_valued_column('age')
1296    ]
1297
1298    regressor = dnn.DNNRegressor(
1299        feature_columns=feature_columns,
1300        hidden_units=[3, 3],
1301        config=run_config.RunConfig(tf_random_seed=1))
1302
1303    regressor.fit(input_fn=_input_fn, steps=200)
1304
1305    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1306    self.assertIn('loss', scores)
1307    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
1308    predicted_scores = list(
1309        regressor.predict_scores(
1310            input_fn=predict_input_fn, as_iterable=True))
1311    self._assertRegressionOutputs(predicted_scores, [3])
1312    predictions = list(
1313        regressor.predict(input_fn=predict_input_fn, as_iterable=True))
1314    self.assertAllClose(predicted_scores, predictions)
1315
1316  def testCustomMetrics(self):
1317    """Tests custom evaluation metrics."""
1318
1319    def _input_fn(num_epochs=None):
1320      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
1321      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1322      features = {
1323          'x':
1324              input_lib.limit_epochs(
1325                  array_ops.ones(
1326                      shape=[4, 1], dtype=dtypes.float32),
1327                  num_epochs=num_epochs),
1328      }
1329      return features, labels
1330
1331    def _my_metric_op(predictions, labels):
1332      return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
1333
1334    regressor = dnn.DNNRegressor(
1335        feature_columns=[feature_column.real_valued_column('x')],
1336        hidden_units=[3, 3],
1337        config=run_config.RunConfig(tf_random_seed=1))
1338
1339    regressor.fit(input_fn=_input_fn, steps=5)
1340    scores = regressor.evaluate(
1341        input_fn=_input_fn,
1342        steps=1,
1343        metrics={
1344            'my_error': metric_ops.streaming_mean_squared_error,
1345            ('my_metric', 'scores'): _my_metric_op
1346        })
1347    self.assertIn('loss', set(scores.keys()))
1348    self.assertIn('my_error', set(scores.keys()))
1349    self.assertIn('my_metric', set(scores.keys()))
1350    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
1351    predictions = np.array(list(regressor.predict_scores(
1352        input_fn=predict_input_fn)))
1353    self.assertAlmostEqual(
1354        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
1355        scores['my_error'])
1356
1357    # Tests the case that the 2nd element of the key is not "scores".
1358    with self.assertRaises(KeyError):
1359      regressor.evaluate(
1360          input_fn=_input_fn,
1361          steps=1,
1362          metrics={
1363              ('my_error', 'predictions'):
1364                  metric_ops.streaming_mean_squared_error
1365          })
1366
1367    # Tests the case where the tuple of the key doesn't have 2 elements.
1368    with self.assertRaises(ValueError):
1369      regressor.evaluate(
1370          input_fn=_input_fn,
1371          steps=1,
1372          metrics={
1373              ('bad_length_name', 'scores', 'bad_length'):
1374                  metric_ops.streaming_mean_squared_error
1375          })
1376
1377  def testCustomMetricsWithMetricSpec(self):
1378    """Tests custom evaluation metrics that use MetricSpec."""
1379
1380    def _input_fn(num_epochs=None):
1381      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
1382      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
1383      features = {
1384          'x':
1385              input_lib.limit_epochs(
1386                  array_ops.ones(
1387                      shape=[4, 1], dtype=dtypes.float32),
1388                  num_epochs=num_epochs),
1389      }
1390      return features, labels
1391
1392    def _my_metric_op(predictions, labels):
1393      return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
1394
1395    regressor = dnn.DNNRegressor(
1396        feature_columns=[feature_column.real_valued_column('x')],
1397        hidden_units=[3, 3],
1398        config=run_config.RunConfig(tf_random_seed=1))
1399
1400    regressor.fit(input_fn=_input_fn, steps=5)
1401    scores = regressor.evaluate(
1402        input_fn=_input_fn,
1403        steps=1,
1404        metrics={
1405            'my_error':
1406                MetricSpec(
1407                    metric_fn=metric_ops.streaming_mean_squared_error,
1408                    prediction_key='scores'),
1409            'my_metric':
1410                MetricSpec(
1411                    metric_fn=_my_metric_op, prediction_key='scores')
1412        })
1413    self.assertIn('loss', set(scores.keys()))
1414    self.assertIn('my_error', set(scores.keys()))
1415    self.assertIn('my_metric', set(scores.keys()))
1416    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
1417    predictions = np.array(list(regressor.predict_scores(
1418        input_fn=predict_input_fn)))
1419    self.assertAlmostEqual(
1420        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
1421        scores['my_error'])
1422
1423    # Tests the case where the prediction_key is not "scores".
1424    with self.assertRaisesRegexp(KeyError, 'bad_type'):
1425      regressor.evaluate(
1426          input_fn=_input_fn,
1427          steps=1,
1428          metrics={
1429              'bad_name':
1430                  MetricSpec(
1431                      metric_fn=metric_ops.streaming_auc,
1432                      prediction_key='bad_type')
1433          })
1434
1435  def testTrainSaveLoad(self):
1436    """Tests that insures you can save and reload a trained model."""
1437
1438    def _input_fn(num_epochs=None):
1439      features = {
1440          'age':
1441              input_lib.limit_epochs(
1442                  constant_op.constant([[0.8], [0.15], [0.]]),
1443                  num_epochs=num_epochs),
1444          'language':
1445              sparse_tensor.SparseTensor(
1446                  values=input_lib.limit_epochs(
1447                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1448                  indices=[[0, 0], [0, 1], [2, 0]],
1449                  dense_shape=[3, 2])
1450      }
1451      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
1452
1453    sparse_column = feature_column.sparse_column_with_hash_bucket(
1454        'language', hash_bucket_size=20)
1455    feature_columns = [
1456        feature_column.embedding_column(
1457            sparse_column, dimension=1),
1458        feature_column.real_valued_column('age')
1459    ]
1460
1461    model_dir = tempfile.mkdtemp()
1462    regressor = dnn.DNNRegressor(
1463        model_dir=model_dir,
1464        feature_columns=feature_columns,
1465        hidden_units=[3, 3],
1466        config=run_config.RunConfig(tf_random_seed=1))
1467
1468    regressor.fit(input_fn=_input_fn, steps=5)
1469    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
1470    predictions = list(regressor.predict_scores(input_fn=predict_input_fn))
1471    del regressor
1472
1473    regressor2 = dnn.DNNRegressor(
1474        model_dir=model_dir,
1475        feature_columns=feature_columns,
1476        hidden_units=[3, 3],
1477        config=run_config.RunConfig(tf_random_seed=1))
1478    predictions2 = list(regressor2.predict_scores(input_fn=predict_input_fn))
1479    self.assertAllClose(predictions, predictions2)
1480
1481  def testTrainWithPartitionedVariables(self):
1482    """Tests training with partitioned variables."""
1483
1484    def _input_fn(num_epochs=None):
1485      features = {
1486          'age':
1487              input_lib.limit_epochs(
1488                  constant_op.constant([[0.8], [0.15], [0.]]),
1489                  num_epochs=num_epochs),
1490          'language':
1491              sparse_tensor.SparseTensor(
1492                  values=input_lib.limit_epochs(
1493                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1494                  indices=[[0, 0], [0, 1], [2, 0]],
1495                  dense_shape=[3, 2])
1496      }
1497      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
1498
1499    # The given hash_bucket_size results in variables larger than the
1500    # default min_slice_size attribute, so the variables are partitioned.
1501    sparse_column = feature_column.sparse_column_with_hash_bucket(
1502        'language', hash_bucket_size=2e7)
1503    feature_columns = [
1504        feature_column.embedding_column(
1505            sparse_column, dimension=1),
1506        feature_column.real_valued_column('age')
1507    ]
1508
1509    tf_config = {
1510        'cluster': {
1511            run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1']
1512        }
1513    }
1514    with test.mock.patch.dict('os.environ',
1515                              {'TF_CONFIG': json.dumps(tf_config)}):
1516      config = run_config.RunConfig(tf_random_seed=1)
1517      # Because we did not start a distributed cluster, we need to pass an
1518      # empty ClusterSpec, otherwise the device_setter will look for
1519      # distributed jobs, such as "/job:ps" which are not present.
1520      config._cluster_spec = server_lib.ClusterSpec({})
1521
1522    regressor = dnn.DNNRegressor(
1523        feature_columns=feature_columns, hidden_units=[3, 3], config=config)
1524
1525    regressor.fit(input_fn=_input_fn, steps=5)
1526
1527    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1528    self.assertIn('loss', scores)
1529
1530  def testEnableCenteredBias(self):
1531    """Tests that we can enable centered bias."""
1532
1533    def _input_fn(num_epochs=None):
1534      features = {
1535          'age':
1536              input_lib.limit_epochs(
1537                  constant_op.constant([[0.8], [0.15], [0.]]),
1538                  num_epochs=num_epochs),
1539          'language':
1540              sparse_tensor.SparseTensor(
1541                  values=input_lib.limit_epochs(
1542                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1543                  indices=[[0, 0], [0, 1], [2, 0]],
1544                  dense_shape=[3, 2])
1545      }
1546      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
1547
1548    sparse_column = feature_column.sparse_column_with_hash_bucket(
1549        'language', hash_bucket_size=20)
1550    feature_columns = [
1551        feature_column.embedding_column(
1552            sparse_column, dimension=1),
1553        feature_column.real_valued_column('age')
1554    ]
1555
1556    regressor = dnn.DNNRegressor(
1557        feature_columns=feature_columns,
1558        hidden_units=[3, 3],
1559        enable_centered_bias=True,
1560        config=run_config.RunConfig(tf_random_seed=1))
1561
1562    regressor.fit(input_fn=_input_fn, steps=5)
1563    self.assertIn('dnn/regression_head/centered_bias_weight',
1564                  regressor.get_variable_names())
1565
1566    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1567    self.assertIn('loss', scores)
1568
1569  def testDisableCenteredBias(self):
1570    """Tests that we can disable centered bias."""
1571
1572    def _input_fn(num_epochs=None):
1573      features = {
1574          'age':
1575              input_lib.limit_epochs(
1576                  constant_op.constant([[0.8], [0.15], [0.]]),
1577                  num_epochs=num_epochs),
1578          'language':
1579              sparse_tensor.SparseTensor(
1580                  values=input_lib.limit_epochs(
1581                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
1582                  indices=[[0, 0], [0, 1], [2, 0]],
1583                  dense_shape=[3, 2])
1584      }
1585      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
1586
1587    sparse_column = feature_column.sparse_column_with_hash_bucket(
1588        'language', hash_bucket_size=20)
1589    feature_columns = [
1590        feature_column.embedding_column(
1591            sparse_column, dimension=1),
1592        feature_column.real_valued_column('age')
1593    ]
1594
1595    regressor = dnn.DNNRegressor(
1596        feature_columns=feature_columns,
1597        hidden_units=[3, 3],
1598        enable_centered_bias=False,
1599        config=run_config.RunConfig(tf_random_seed=1))
1600
1601    regressor.fit(input_fn=_input_fn, steps=5)
1602    self.assertNotIn('centered_bias_weight', regressor.get_variable_names())
1603
1604    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
1605    self.assertIn('loss', scores)
1606
1607
1608def boston_input_fn():
1609  boston = base.load_boston()
1610  features = math_ops.cast(
1611      array_ops.reshape(constant_op.constant(boston.data), [-1, 13]),
1612      dtypes.float32)
1613  labels = math_ops.cast(
1614      array_ops.reshape(constant_op.constant(boston.target), [-1, 1]),
1615      dtypes.float32)
1616  return features, labels
1617
1618
1619class FeatureColumnTest(test.TestCase):
1620
1621  def testTrain(self):
1622    feature_columns = estimator.infer_real_valued_columns_from_input_fn(
1623        boston_input_fn)
1624    est = dnn.DNNRegressor(feature_columns=feature_columns, hidden_units=[3, 3])
1625    est.fit(input_fn=boston_input_fn, steps=1)
1626    _ = est.evaluate(input_fn=boston_input_fn, steps=1)
1627
1628
1629if __name__ == '__main__':
1630  test.main()
1631