• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#     http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Tests for Debug estimators."""
16
17from __future__ import absolute_import
18from __future__ import division
19from __future__ import print_function
20
21import collections
22import functools
23import operator
24import tempfile
25
26import numpy as np
27
28from tensorflow.contrib.layers.python.layers import feature_column
29from tensorflow.contrib.layers.python.layers import feature_column_ops
30from tensorflow.contrib.learn.python.learn import experiment
31from tensorflow.contrib.learn.python.learn.datasets import base
32from tensorflow.contrib.learn.python.learn.estimators import _sklearn
33from tensorflow.contrib.learn.python.learn.estimators import debug
34from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
35from tensorflow.contrib.learn.python.learn.estimators import run_config
36from tensorflow.contrib.learn.python.learn.estimators import test_data
37from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
38from tensorflow.contrib.metrics.python.ops import metric_ops
39from tensorflow.python.framework import constant_op
40from tensorflow.python.framework import dtypes
41from tensorflow.python.framework import sparse_tensor
42from tensorflow.python.ops import array_ops
43from tensorflow.python.ops import math_ops
44from tensorflow.python.platform import test
45from tensorflow.python.training import input as input_lib
46
47NUM_EXAMPLES = 100
48N_CLASSES = 5  #  Cardinality of multiclass labels.
49LABEL_DIMENSION = 3  #  Dimensionality of regression labels.
50
51
52def _train_test_split(features_and_labels):
53  features, labels = features_and_labels
54  train_set = (features[:int(len(features) / 2)],
55               labels[:int(len(features) / 2)])
56  test_set = (features[int(len(features) / 2):],
57              labels[int(len(features) / 2):])
58  return train_set, test_set
59
60
61def _input_fn_builder(features, labels):
62
63  def input_fn():
64    feature_dict = {'features': constant_op.constant(features)}
65    my_labels = labels
66    if my_labels is not None:
67      my_labels = constant_op.constant(my_labels)
68    return feature_dict, my_labels
69
70  return input_fn
71
72
73class DebugClassifierTest(test.TestCase):
74
75  def setUp(self):
76    np.random.seed(100)
77    self.features = np.random.rand(NUM_EXAMPLES, 5)
78    self.labels = np.random.choice(
79        range(N_CLASSES), p=[0.1, 0.3, 0.4, 0.1, 0.1], size=NUM_EXAMPLES)
80    self.binary_labels = np.random.choice(
81        range(2), p=[0.2, 0.8], size=NUM_EXAMPLES)
82    self.binary_float_labels = np.random.choice(
83        range(2), p=[0.2, 0.8], size=NUM_EXAMPLES)
84
85  def testPredict(self):
86    """Tests that DebugClassifier outputs the majority class."""
87    (train_features, train_labels), (test_features,
88                                     test_labels) = _train_test_split(
89                                         [self.features, self.labels])
90    majority_class, _ = max(
91        collections.Counter(train_labels).items(), key=operator.itemgetter(1))
92    expected_prediction = np.vstack(
93        [[majority_class] for _ in range(test_labels.shape[0])])
94
95    classifier = debug.DebugClassifier(n_classes=N_CLASSES)
96    classifier.fit(
97        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
98
99    pred = classifier.predict_classes(
100        input_fn=_input_fn_builder(test_features, None))
101    self.assertAllEqual(expected_prediction, np.vstack(pred))
102
103  def testPredictBinary(self):
104    """Same as above for binary predictions."""
105    (train_features, train_labels), (test_features,
106                                     test_labels) = _train_test_split(
107                                         [self.features, self.binary_labels])
108
109    majority_class, _ = max(
110        collections.Counter(train_labels).items(), key=operator.itemgetter(1))
111    expected_prediction = np.vstack(
112        [[majority_class] for _ in range(test_labels.shape[0])])
113
114    classifier = debug.DebugClassifier(n_classes=2)
115    classifier.fit(
116        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
117
118    pred = classifier.predict_classes(
119        input_fn=_input_fn_builder(test_features, None))
120    self.assertAllEqual(expected_prediction, np.vstack(pred))
121
122    (train_features,
123     train_labels), (test_features, test_labels) = _train_test_split(
124         [self.features, self.binary_float_labels])
125
126    majority_class, _ = max(
127        collections.Counter(train_labels).items(), key=operator.itemgetter(1))
128    expected_prediction = np.vstack(
129        [[majority_class] for _ in range(test_labels.shape[0])])
130
131    classifier = debug.DebugClassifier(n_classes=2)
132    classifier.fit(
133        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
134
135    pred = classifier.predict_classes(
136        input_fn=_input_fn_builder(test_features, None))
137    self.assertAllEqual(expected_prediction, np.vstack(pred))
138
139  def testPredictProba(self):
140    """Tests that DebugClassifier outputs observed class distribution."""
141    (train_features, train_labels), (test_features,
142                                     test_labels) = _train_test_split(
143                                         [self.features, self.labels])
144
145    class_distribution = np.zeros((1, N_CLASSES))
146    for label in train_labels:
147      class_distribution[0, label] += 1
148    class_distribution /= len(train_labels)
149
150    expected_prediction = np.vstack(
151        [class_distribution for _ in range(test_labels.shape[0])])
152
153    classifier = debug.DebugClassifier(n_classes=N_CLASSES)
154    classifier.fit(
155        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
156
157    pred = classifier.predict_proba(
158        input_fn=_input_fn_builder(test_features, None))
159
160    self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1)
161
162  def testPredictProbaBinary(self):
163    """Same as above but for binary classification."""
164    (train_features, train_labels), (test_features,
165                                     test_labels) = _train_test_split(
166                                         [self.features, self.binary_labels])
167
168    class_distribution = np.zeros((1, 2))
169    for label in train_labels:
170      class_distribution[0, label] += 1
171    class_distribution /= len(train_labels)
172
173    expected_prediction = np.vstack(
174        [class_distribution for _ in range(test_labels.shape[0])])
175
176    classifier = debug.DebugClassifier(n_classes=2)
177    classifier.fit(
178        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
179
180    pred = classifier.predict_proba(
181        input_fn=_input_fn_builder(test_features, None))
182
183    self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1)
184
185    (train_features,
186     train_labels), (test_features, test_labels) = _train_test_split(
187         [self.features, self.binary_float_labels])
188
189    class_distribution = np.zeros((1, 2))
190    for label in train_labels:
191      class_distribution[0, int(label)] += 1
192    class_distribution /= len(train_labels)
193
194    expected_prediction = np.vstack(
195        [class_distribution for _ in range(test_labels.shape[0])])
196
197    classifier = debug.DebugClassifier(n_classes=2)
198    classifier.fit(
199        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
200
201    pred = classifier.predict_proba(
202        input_fn=_input_fn_builder(test_features, None))
203
204    self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1)
205
206  def testExperimentIntegration(self):
207    exp = experiment.Experiment(
208        estimator=debug.DebugClassifier(n_classes=3),
209        train_input_fn=test_data.iris_input_multiclass_fn,
210        eval_input_fn=test_data.iris_input_multiclass_fn)
211    exp.test()
212
213  def _assertInRange(self, expected_min, expected_max, actual):
214    self.assertLessEqual(expected_min, actual)
215    self.assertGreaterEqual(expected_max, actual)
216
217  def testEstimatorContract(self):
218    estimator_test_utils.assert_estimator_contract(self, debug.DebugClassifier)
219
220  def testLogisticRegression_MatrixData(self):
221    """Tests binary classification using matrix data as input."""
222    classifier = debug.DebugClassifier(
223        config=run_config.RunConfig(tf_random_seed=1))
224    input_fn = test_data.iris_input_logistic_fn
225    classifier.fit(input_fn=input_fn, steps=5)
226    scores = classifier.evaluate(input_fn=input_fn, steps=1)
227    self._assertInRange(0.0, 1.0, scores['accuracy'])
228    self.assertIn('loss', scores)
229
230  def testLogisticRegression_MatrixData_Labels1D(self):
231    """Same as the last test, but label shape is [100] instead of [100, 1]."""
232
233    def _input_fn():
234      iris = test_data.prepare_iris_data_for_logistic_regression()
235      return {
236          'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
237      }, constant_op.constant(
238          iris.target, shape=[100], dtype=dtypes.int32)
239
240    classifier = debug.DebugClassifier(
241        config=run_config.RunConfig(tf_random_seed=1))
242    classifier.fit(input_fn=_input_fn, steps=5)
243    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
244    self.assertIn('loss', scores)
245
246  def testLogisticRegression_NpMatrixData(self):
247    """Tests binary classification using numpy matrix data as input."""
248    iris = test_data.prepare_iris_data_for_logistic_regression()
249    train_x = iris.data
250    train_y = iris.target
251    classifier = debug.DebugClassifier(
252        config=run_config.RunConfig(tf_random_seed=1))
253    classifier.fit(x=train_x, y=train_y, steps=5)
254    scores = classifier.evaluate(x=train_x, y=train_y, steps=1)
255    self._assertInRange(0.0, 1.0, scores['accuracy'])
256
257  def _assertBinaryPredictions(self, expected_len, predictions):
258    self.assertEqual(expected_len, len(predictions))
259    for prediction in predictions:
260      self.assertIn(prediction, (0, 1))
261
262  def _assertProbabilities(self, expected_batch_size, expected_n_classes,
263                           probabilities):
264    self.assertEqual(expected_batch_size, len(probabilities))
265    for b in range(expected_batch_size):
266      self.assertEqual(expected_n_classes, len(probabilities[b]))
267      for i in range(expected_n_classes):
268        self._assertInRange(0.0, 1.0, probabilities[b][i])
269
270  def testLogisticRegression_TensorData(self):
271    """Tests binary classification using tensor data as input."""
272
273    def _input_fn(num_epochs=None):
274      features = {
275          'age':
276              input_lib.limit_epochs(
277                  constant_op.constant([[.8], [0.2], [.1]]),
278                  num_epochs=num_epochs),
279          'language':
280              sparse_tensor.SparseTensor(
281                  values=input_lib.limit_epochs(
282                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
283                  indices=[[0, 0], [0, 1], [2, 0]],
284                  dense_shape=[3, 2])
285      }
286      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
287
288    classifier = debug.DebugClassifier(n_classes=2)
289
290    classifier.fit(input_fn=_input_fn, steps=50)
291
292    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
293    self._assertInRange(0.0, 1.0, scores['accuracy'])
294    self.assertIn('loss', scores)
295    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
296    predictions = list(classifier.predict_classes(input_fn=predict_input_fn))
297    self._assertBinaryPredictions(3, predictions)
298
299  def testLogisticRegression_FloatLabel(self):
300    """Tests binary classification with float labels."""
301
302    def _input_fn_float_label(num_epochs=None):
303      features = {
304          'age':
305              input_lib.limit_epochs(
306                  constant_op.constant([[50], [20], [10]]),
307                  num_epochs=num_epochs),
308          'language':
309              sparse_tensor.SparseTensor(
310                  values=input_lib.limit_epochs(
311                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
312                  indices=[[0, 0], [0, 1], [2, 0]],
313                  dense_shape=[3, 2])
314      }
315      labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32)
316      return features, labels
317
318    classifier = debug.DebugClassifier(n_classes=2)
319
320    classifier.fit(input_fn=_input_fn_float_label, steps=50)
321
322    predict_input_fn = functools.partial(_input_fn_float_label, num_epochs=1)
323    predictions = list(classifier.predict_classes(input_fn=predict_input_fn))
324    self._assertBinaryPredictions(3, predictions)
325    predictions_proba = list(
326        classifier.predict_proba(input_fn=predict_input_fn))
327    self._assertProbabilities(3, 2, predictions_proba)
328
329  def testMultiClass_MatrixData(self):
330    """Tests multi-class classification using matrix data as input."""
331    classifier = debug.DebugClassifier(n_classes=3)
332
333    input_fn = test_data.iris_input_multiclass_fn
334    classifier.fit(input_fn=input_fn, steps=200)
335    scores = classifier.evaluate(input_fn=input_fn, steps=1)
336    self._assertInRange(0.0, 1.0, scores['accuracy'])
337    self.assertIn('loss', scores)
338
339  def testMultiClass_MatrixData_Labels1D(self):
340    """Same as the last test, but label shape is [150] instead of [150, 1]."""
341
342    def _input_fn():
343      iris = base.load_iris()
344      return {
345          'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
346      }, constant_op.constant(
347          iris.target, shape=[150], dtype=dtypes.int32)
348
349    classifier = debug.DebugClassifier(n_classes=3)
350
351    classifier.fit(input_fn=_input_fn, steps=200)
352    scores = classifier.evaluate(input_fn=_input_fn, steps=1)
353    self._assertInRange(0.0, 1.0, scores['accuracy'])
354
355  def testMultiClass_NpMatrixData(self):
356    """Tests multi-class classification using numpy matrix data as input."""
357    iris = base.load_iris()
358    train_x = iris.data
359    train_y = iris.target
360    classifier = debug.DebugClassifier(n_classes=3)
361    classifier.fit(x=train_x, y=train_y, steps=200)
362    scores = classifier.evaluate(x=train_x, y=train_y, steps=1)
363    self._assertInRange(0.0, 1.0, scores['accuracy'])
364
365  def testMultiClass_StringLabel(self):
366    """Tests multi-class classification with string labels."""
367
368    def _input_fn_train():
369      labels = constant_op.constant([['foo'], ['bar'], ['baz'], ['bar']])
370      features = {
371          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
372      }
373      return features, labels
374
375    classifier = debug.DebugClassifier(
376        n_classes=3, label_keys=['foo', 'bar', 'baz'])
377
378    classifier.fit(input_fn=_input_fn_train, steps=5)
379    scores = classifier.evaluate(input_fn=_input_fn_train, steps=1)
380    self.assertIn('loss', scores)
381
382  def testLoss(self):
383    """Tests loss calculation."""
384
385    def _input_fn_train():
386      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
387      # The logistic prediction should be (y = 0.25).
388      labels = constant_op.constant([[1], [0], [0], [0]])
389      features = {
390          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
391      }
392      return features, labels
393
394    classifier = debug.DebugClassifier(n_classes=2)
395
396    classifier.fit(input_fn=_input_fn_train, steps=5)
397    scores = classifier.evaluate(input_fn=_input_fn_train, steps=1)
398    self.assertIn('loss', scores)
399
400  def testLossWithWeights(self):
401    """Tests loss calculation with weights."""
402
403    def _input_fn_train():
404      # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
405      # The logistic prediction should be (y = 0.25).
406      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
407      features = {
408          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
409          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
410      }
411      return features, labels
412
413    def _input_fn_eval():
414      # 4 rows, with different weights.
415      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
416      features = {
417          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
418          'w': constant_op.constant([[7.], [1.], [1.], [1.]])
419      }
420      return features, labels
421
422    classifier = debug.DebugClassifier(
423        weight_column_name='w',
424        n_classes=2,
425        config=run_config.RunConfig(tf_random_seed=1))
426
427    classifier.fit(input_fn=_input_fn_train, steps=5)
428    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
429    self.assertIn('loss', scores)
430
431  def testTrainWithWeights(self):
432    """Tests training with given weight column."""
433
434    def _input_fn_train():
435      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
436      # First row has more weight than others. Model should fit (y=x) better
437      # than (y=Not(x)) due to the relative higher weight of the first row.
438      labels = constant_op.constant([[1], [0], [0], [0]])
439      features = {
440          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
441          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
442      }
443      return features, labels
444
445    def _input_fn_eval():
446      # Create 4 rows (y = x)
447      labels = constant_op.constant([[1], [1], [1], [1]])
448      features = {
449          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
450          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
451      }
452      return features, labels
453
454    classifier = debug.DebugClassifier(weight_column_name='w')
455
456    classifier.fit(input_fn=_input_fn_train, steps=5)
457    scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
458    self._assertInRange(0.0, 1.0, scores['accuracy'])
459
460  def testCustomMetrics(self):
461    """Tests custom evaluation metrics."""
462
463    def _input_fn(num_epochs=None):
464      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
465      labels = constant_op.constant([[1], [0], [0], [0]])
466      features = {
467          'x':
468              input_lib.limit_epochs(
469                  array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
470                  num_epochs=num_epochs),
471      }
472      return features, labels
473
474    def _my_metric_op(predictions, labels):
475      # For the case of binary classification, the 2nd column of "predictions"
476      # denotes the model predictions.
477      labels = math_ops.cast(labels, dtypes.float32)
478      predictions = array_ops.strided_slice(
479          predictions, [0, 1], [-1, 2], end_mask=1)
480      labels = math_ops.cast(labels, predictions.dtype)
481      return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
482
483    classifier = debug.DebugClassifier(
484        config=run_config.RunConfig(tf_random_seed=1))
485
486    classifier.fit(input_fn=_input_fn, steps=5)
487    scores = classifier.evaluate(
488        input_fn=_input_fn,
489        steps=5,
490        metrics={
491            'my_accuracy':
492                MetricSpec(
493                    metric_fn=metric_ops.streaming_accuracy,
494                    prediction_key='classes'),
495            'my_precision':
496                MetricSpec(
497                    metric_fn=metric_ops.streaming_precision,
498                    prediction_key='classes'),
499            'my_metric':
500                MetricSpec(
501                    metric_fn=_my_metric_op, prediction_key='probabilities')
502        })
503    self.assertTrue(
504        set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset(
505            set(scores.keys())))
506    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
507    predictions = np.array(
508        list(classifier.predict_classes(input_fn=predict_input_fn)))
509    self.assertEqual(
510        _sklearn.accuracy_score([1, 0, 0, 0], predictions),
511        scores['my_accuracy'])
512
513    # Test the case where the 2nd element of the key is neither "classes" nor
514    # "probabilities".
515    with self.assertRaisesRegexp(KeyError, 'bad_type'):
516      classifier.evaluate(
517          input_fn=_input_fn,
518          steps=5,
519          metrics={
520              'bad_name':
521                  MetricSpec(
522                      metric_fn=metric_ops.streaming_auc,
523                      prediction_key='bad_type')
524          })
525
526  def testTrainSaveLoad(self):
527    """Tests that insures you can save and reload a trained model."""
528
529    def _input_fn(num_epochs=None):
530      features = {
531          'age':
532              input_lib.limit_epochs(
533                  constant_op.constant([[.8], [.2], [.1]]),
534                  num_epochs=num_epochs),
535          'language':
536              sparse_tensor.SparseTensor(
537                  values=input_lib.limit_epochs(
538                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
539                  indices=[[0, 0], [0, 1], [2, 0]],
540                  dense_shape=[3, 2])
541      }
542      return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32)
543
544    model_dir = tempfile.mkdtemp()
545    classifier = debug.DebugClassifier(
546        model_dir=model_dir,
547        n_classes=3,
548        config=run_config.RunConfig(tf_random_seed=1))
549
550    classifier.fit(input_fn=_input_fn, steps=5)
551    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
552    predictions1 = classifier.predict_classes(input_fn=predict_input_fn)
553    del classifier
554
555    classifier2 = debug.DebugClassifier(
556        model_dir=model_dir,
557        n_classes=3,
558        config=run_config.RunConfig(tf_random_seed=1))
559    predictions2 = classifier2.predict_classes(input_fn=predict_input_fn)
560    self.assertEqual(list(predictions1), list(predictions2))
561
562  def testExport(self):
563    """Tests export model for servo."""
564
565    def input_fn():
566      return {
567          'age':
568              constant_op.constant([1]),
569          'language':
570              sparse_tensor.SparseTensor(
571                  values=['english'], indices=[[0, 0]], dense_shape=[1, 1])
572      }, constant_op.constant([[1]])
573
574    language = feature_column.sparse_column_with_hash_bucket('language', 100)
575    feature_columns = [
576        feature_column.real_valued_column('age'),
577        feature_column.embedding_column(language, dimension=1)
578    ]
579
580    classifier = debug.DebugClassifier(
581        config=run_config.RunConfig(tf_random_seed=1))
582    classifier.fit(input_fn=input_fn, steps=5)
583
584    def default_input_fn(unused_estimator, examples):
585      return feature_column_ops.parse_feature_columns_from_examples(
586          examples, feature_columns)
587
588    export_dir = tempfile.mkdtemp()
589    classifier.export(export_dir, input_fn=default_input_fn)
590
591
592class DebugRegressorTest(test.TestCase):
593
594  def setUp(self):
595    np.random.seed(100)
596    self.features = np.random.rand(NUM_EXAMPLES, 5)
597    self.targets = np.random.rand(NUM_EXAMPLES, LABEL_DIMENSION)
598
599  def testPredictScores(self):
600    """Tests that DebugRegressor outputs the mean target."""
601    (train_features, train_labels), (test_features,
602                                     test_labels) = _train_test_split(
603                                         [self.features, self.targets])
604    mean_target = np.mean(train_labels, 0)
605    expected_prediction = np.vstack(
606        [mean_target for _ in range(test_labels.shape[0])])
607
608    classifier = debug.DebugRegressor(label_dimension=LABEL_DIMENSION)
609    classifier.fit(
610        input_fn=_input_fn_builder(train_features, train_labels), steps=50)
611
612    pred = classifier.predict_scores(
613        input_fn=_input_fn_builder(test_features, None))
614    self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1)
615
616  def testExperimentIntegration(self):
617    exp = experiment.Experiment(
618        estimator=debug.DebugRegressor(),
619        train_input_fn=test_data.iris_input_logistic_fn,
620        eval_input_fn=test_data.iris_input_logistic_fn)
621    exp.test()
622
623  def testEstimatorContract(self):
624    estimator_test_utils.assert_estimator_contract(self, debug.DebugRegressor)
625
626  def testRegression_MatrixData(self):
627    """Tests regression using matrix data as input."""
628    regressor = debug.DebugRegressor(
629        config=run_config.RunConfig(tf_random_seed=1))
630    input_fn = test_data.iris_input_logistic_fn
631    regressor.fit(input_fn=input_fn, steps=200)
632    scores = regressor.evaluate(input_fn=input_fn, steps=1)
633    self.assertIn('loss', scores)
634
635  def testRegression_MatrixData_Labels1D(self):
636    """Same as the last test, but label shape is [100] instead of [100, 1]."""
637
638    def _input_fn():
639      iris = test_data.prepare_iris_data_for_logistic_regression()
640      return {
641          'feature': constant_op.constant(iris.data, dtype=dtypes.float32)
642      }, constant_op.constant(
643          iris.target, shape=[100], dtype=dtypes.int32)
644
645    regressor = debug.DebugRegressor(
646        config=run_config.RunConfig(tf_random_seed=1))
647
648    regressor.fit(input_fn=_input_fn, steps=200)
649    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
650    self.assertIn('loss', scores)
651
652  def testRegression_NpMatrixData(self):
653    """Tests binary classification using numpy matrix data as input."""
654    iris = test_data.prepare_iris_data_for_logistic_regression()
655    train_x = iris.data
656    train_y = iris.target
657    regressor = debug.DebugRegressor(
658        config=run_config.RunConfig(tf_random_seed=1))
659    regressor.fit(x=train_x, y=train_y, steps=200)
660    scores = regressor.evaluate(x=train_x, y=train_y, steps=1)
661    self.assertIn('loss', scores)
662
663  def testRegression_TensorData(self):
664    """Tests regression using tensor data as input."""
665
666    def _input_fn(num_epochs=None):
667      features = {
668          'age':
669              input_lib.limit_epochs(
670                  constant_op.constant([[.8], [.15], [0.]]),
671                  num_epochs=num_epochs),
672          'language':
673              sparse_tensor.SparseTensor(
674                  values=input_lib.limit_epochs(
675                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
676                  indices=[[0, 0], [0, 1], [2, 0]],
677                  dense_shape=[3, 2])
678      }
679      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
680
681    regressor = debug.DebugRegressor(
682        config=run_config.RunConfig(tf_random_seed=1))
683
684    regressor.fit(input_fn=_input_fn, steps=200)
685
686    scores = regressor.evaluate(input_fn=_input_fn, steps=1)
687    self.assertIn('loss', scores)
688
689  def testLoss(self):
690    """Tests loss calculation."""
691
692    def _input_fn_train():
693      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
694      # The algorithm should learn (y = 0.25).
695      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
696      features = {
697          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
698      }
699      return features, labels
700
701    regressor = debug.DebugRegressor(
702        config=run_config.RunConfig(tf_random_seed=1))
703
704    regressor.fit(input_fn=_input_fn_train, steps=5)
705    scores = regressor.evaluate(input_fn=_input_fn_train, steps=1)
706    self.assertIn('loss', scores)
707
708  def testLossWithWeights(self):
709    """Tests loss calculation with weights."""
710
711    def _input_fn_train():
712      # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x))
713      # The algorithm should learn (y = 0.25).
714      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
715      features = {
716          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
717          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
718      }
719      return features, labels
720
721    def _input_fn_eval():
722      # 4 rows, with different weights.
723      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
724      features = {
725          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
726          'w': constant_op.constant([[7.], [1.], [1.], [1.]])
727      }
728      return features, labels
729
730    regressor = debug.DebugRegressor(
731        weight_column_name='w', config=run_config.RunConfig(tf_random_seed=1))
732
733    regressor.fit(input_fn=_input_fn_train, steps=5)
734    scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
735    self.assertIn('loss', scores)
736
737  def testTrainWithWeights(self):
738    """Tests training with given weight column."""
739
740    def _input_fn_train():
741      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
742      # First row has more weight than others. Model should fit (y=x) better
743      # than (y=Not(x)) due to the relative higher weight of the first row.
744      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
745      features = {
746          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
747          'w': constant_op.constant([[100.], [3.], [2.], [2.]])
748      }
749      return features, labels
750
751    def _input_fn_eval():
752      # Create 4 rows (y = x)
753      labels = constant_op.constant([[1.], [1.], [1.], [1.]])
754      features = {
755          'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
756          'w': constant_op.constant([[1.], [1.], [1.], [1.]])
757      }
758      return features, labels
759
760    regressor = debug.DebugRegressor(
761        weight_column_name='w', config=run_config.RunConfig(tf_random_seed=1))
762
763    regressor.fit(input_fn=_input_fn_train, steps=5)
764    scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
765    self.assertIn('loss', scores)
766
767  def testCustomMetrics(self):
768    """Tests custom evaluation metrics."""
769
770    def _input_fn(num_epochs=None):
771      # Create 4 rows, one of them (y = x), three of them (y=Not(x))
772      labels = constant_op.constant([[1.], [0.], [0.], [0.]])
773      features = {
774          'x':
775              input_lib.limit_epochs(
776                  array_ops.ones(shape=[4, 1], dtype=dtypes.float32),
777                  num_epochs=num_epochs),
778      }
779      return features, labels
780
781    def _my_metric_op(predictions, labels):
782      return math_ops.reduce_sum(math_ops.multiply(predictions, labels))
783
784    regressor = debug.DebugRegressor(
785        config=run_config.RunConfig(tf_random_seed=1))
786
787    regressor.fit(input_fn=_input_fn, steps=5)
788    scores = regressor.evaluate(
789        input_fn=_input_fn,
790        steps=1,
791        metrics={
792            'my_error':
793                MetricSpec(
794                    metric_fn=metric_ops.streaming_mean_squared_error,
795                    prediction_key='scores'),
796            'my_metric':
797                MetricSpec(metric_fn=_my_metric_op, prediction_key='scores')
798        })
799    self.assertIn('loss', set(scores.keys()))
800    self.assertIn('my_error', set(scores.keys()))
801    self.assertIn('my_metric', set(scores.keys()))
802    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
803    predictions = np.array(
804        list(regressor.predict_scores(input_fn=predict_input_fn)))
805    self.assertAlmostEqual(
806        _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions),
807        scores['my_error'])
808
809    # Tests the case where the prediction_key is not "scores".
810    with self.assertRaisesRegexp(KeyError, 'bad_type'):
811      regressor.evaluate(
812          input_fn=_input_fn,
813          steps=1,
814          metrics={
815              'bad_name':
816                  MetricSpec(
817                      metric_fn=metric_ops.streaming_auc,
818                      prediction_key='bad_type')
819          })
820
821  def testTrainSaveLoad(self):
822    """Tests that insures you can save and reload a trained model."""
823
824    def _input_fn(num_epochs=None):
825      features = {
826          'age':
827              input_lib.limit_epochs(
828                  constant_op.constant([[0.8], [0.15], [0.]]),
829                  num_epochs=num_epochs),
830          'language':
831              sparse_tensor.SparseTensor(
832                  values=input_lib.limit_epochs(
833                      ['en', 'fr', 'zh'], num_epochs=num_epochs),
834                  indices=[[0, 0], [0, 1], [2, 0]],
835                  dense_shape=[3, 2])
836      }
837      return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32)
838
839    model_dir = tempfile.mkdtemp()
840    regressor = debug.DebugRegressor(
841        model_dir=model_dir, config=run_config.RunConfig(tf_random_seed=1))
842
843    regressor.fit(input_fn=_input_fn, steps=5)
844    predict_input_fn = functools.partial(_input_fn, num_epochs=1)
845    predictions = list(regressor.predict_scores(input_fn=predict_input_fn))
846    del regressor
847
848    regressor2 = debug.DebugRegressor(
849        model_dir=model_dir, config=run_config.RunConfig(tf_random_seed=1))
850    predictions2 = list(regressor2.predict_scores(input_fn=predict_input_fn))
851    self.assertAllClose(predictions, predictions2)
852
853
854if __name__ == '__main__':
855  test.main()
856