1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Debug estimators.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import collections 22import functools 23import operator 24import tempfile 25 26import numpy as np 27 28from tensorflow.contrib.layers.python.layers import feature_column 29from tensorflow.contrib.layers.python.layers import feature_column_ops 30from tensorflow.contrib.learn.python.learn import experiment 31from tensorflow.contrib.learn.python.learn.datasets import base 32from tensorflow.contrib.learn.python.learn.estimators import _sklearn 33from tensorflow.contrib.learn.python.learn.estimators import debug 34from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils 35from tensorflow.contrib.learn.python.learn.estimators import run_config 36from tensorflow.contrib.learn.python.learn.estimators import test_data 37from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec 38from tensorflow.contrib.metrics.python.ops import metric_ops 39from tensorflow.python.framework import constant_op 40from tensorflow.python.framework import dtypes 41from tensorflow.python.framework import sparse_tensor 42from tensorflow.python.ops import array_ops 43from tensorflow.python.ops import math_ops 44from tensorflow.python.platform import test 45from tensorflow.python.training import input as input_lib 46 47NUM_EXAMPLES = 100 48N_CLASSES = 5 # Cardinality of multiclass labels. 49LABEL_DIMENSION = 3 # Dimensionality of regression labels. 50 51 52def _train_test_split(features_and_labels): 53 features, labels = features_and_labels 54 train_set = (features[:int(len(features) / 2)], 55 labels[:int(len(features) / 2)]) 56 test_set = (features[int(len(features) / 2):], 57 labels[int(len(features) / 2):]) 58 return train_set, test_set 59 60 61def _input_fn_builder(features, labels): 62 63 def input_fn(): 64 feature_dict = {'features': constant_op.constant(features)} 65 my_labels = labels 66 if my_labels is not None: 67 my_labels = constant_op.constant(my_labels) 68 return feature_dict, my_labels 69 70 return input_fn 71 72 73class DebugClassifierTest(test.TestCase): 74 75 def setUp(self): 76 np.random.seed(100) 77 self.features = np.random.rand(NUM_EXAMPLES, 5) 78 self.labels = np.random.choice( 79 range(N_CLASSES), p=[0.1, 0.3, 0.4, 0.1, 0.1], size=NUM_EXAMPLES) 80 self.binary_labels = np.random.choice( 81 range(2), p=[0.2, 0.8], size=NUM_EXAMPLES) 82 self.binary_float_labels = np.random.choice( 83 range(2), p=[0.2, 0.8], size=NUM_EXAMPLES) 84 85 def testPredict(self): 86 """Tests that DebugClassifier outputs the majority class.""" 87 (train_features, train_labels), (test_features, 88 test_labels) = _train_test_split( 89 [self.features, self.labels]) 90 majority_class, _ = max( 91 collections.Counter(train_labels).items(), key=operator.itemgetter(1)) 92 expected_prediction = np.vstack( 93 [[majority_class] for _ in range(test_labels.shape[0])]) 94 95 classifier = debug.DebugClassifier(n_classes=N_CLASSES) 96 classifier.fit( 97 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 98 99 pred = classifier.predict_classes( 100 input_fn=_input_fn_builder(test_features, None)) 101 self.assertAllEqual(expected_prediction, np.vstack(pred)) 102 103 def testPredictBinary(self): 104 """Same as above for binary predictions.""" 105 (train_features, train_labels), (test_features, 106 test_labels) = _train_test_split( 107 [self.features, self.binary_labels]) 108 109 majority_class, _ = max( 110 collections.Counter(train_labels).items(), key=operator.itemgetter(1)) 111 expected_prediction = np.vstack( 112 [[majority_class] for _ in range(test_labels.shape[0])]) 113 114 classifier = debug.DebugClassifier(n_classes=2) 115 classifier.fit( 116 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 117 118 pred = classifier.predict_classes( 119 input_fn=_input_fn_builder(test_features, None)) 120 self.assertAllEqual(expected_prediction, np.vstack(pred)) 121 122 (train_features, 123 train_labels), (test_features, test_labels) = _train_test_split( 124 [self.features, self.binary_float_labels]) 125 126 majority_class, _ = max( 127 collections.Counter(train_labels).items(), key=operator.itemgetter(1)) 128 expected_prediction = np.vstack( 129 [[majority_class] for _ in range(test_labels.shape[0])]) 130 131 classifier = debug.DebugClassifier(n_classes=2) 132 classifier.fit( 133 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 134 135 pred = classifier.predict_classes( 136 input_fn=_input_fn_builder(test_features, None)) 137 self.assertAllEqual(expected_prediction, np.vstack(pred)) 138 139 def testPredictProba(self): 140 """Tests that DebugClassifier outputs observed class distribution.""" 141 (train_features, train_labels), (test_features, 142 test_labels) = _train_test_split( 143 [self.features, self.labels]) 144 145 class_distribution = np.zeros((1, N_CLASSES)) 146 for label in train_labels: 147 class_distribution[0, label] += 1 148 class_distribution /= len(train_labels) 149 150 expected_prediction = np.vstack( 151 [class_distribution for _ in range(test_labels.shape[0])]) 152 153 classifier = debug.DebugClassifier(n_classes=N_CLASSES) 154 classifier.fit( 155 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 156 157 pred = classifier.predict_proba( 158 input_fn=_input_fn_builder(test_features, None)) 159 160 self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1) 161 162 def testPredictProbaBinary(self): 163 """Same as above but for binary classification.""" 164 (train_features, train_labels), (test_features, 165 test_labels) = _train_test_split( 166 [self.features, self.binary_labels]) 167 168 class_distribution = np.zeros((1, 2)) 169 for label in train_labels: 170 class_distribution[0, label] += 1 171 class_distribution /= len(train_labels) 172 173 expected_prediction = np.vstack( 174 [class_distribution for _ in range(test_labels.shape[0])]) 175 176 classifier = debug.DebugClassifier(n_classes=2) 177 classifier.fit( 178 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 179 180 pred = classifier.predict_proba( 181 input_fn=_input_fn_builder(test_features, None)) 182 183 self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1) 184 185 (train_features, 186 train_labels), (test_features, test_labels) = _train_test_split( 187 [self.features, self.binary_float_labels]) 188 189 class_distribution = np.zeros((1, 2)) 190 for label in train_labels: 191 class_distribution[0, int(label)] += 1 192 class_distribution /= len(train_labels) 193 194 expected_prediction = np.vstack( 195 [class_distribution for _ in range(test_labels.shape[0])]) 196 197 classifier = debug.DebugClassifier(n_classes=2) 198 classifier.fit( 199 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 200 201 pred = classifier.predict_proba( 202 input_fn=_input_fn_builder(test_features, None)) 203 204 self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1) 205 206 def testExperimentIntegration(self): 207 exp = experiment.Experiment( 208 estimator=debug.DebugClassifier(n_classes=3), 209 train_input_fn=test_data.iris_input_multiclass_fn, 210 eval_input_fn=test_data.iris_input_multiclass_fn) 211 exp.test() 212 213 def _assertInRange(self, expected_min, expected_max, actual): 214 self.assertLessEqual(expected_min, actual) 215 self.assertGreaterEqual(expected_max, actual) 216 217 def testEstimatorContract(self): 218 estimator_test_utils.assert_estimator_contract(self, debug.DebugClassifier) 219 220 def testLogisticRegression_MatrixData(self): 221 """Tests binary classification using matrix data as input.""" 222 classifier = debug.DebugClassifier( 223 config=run_config.RunConfig(tf_random_seed=1)) 224 input_fn = test_data.iris_input_logistic_fn 225 classifier.fit(input_fn=input_fn, steps=5) 226 scores = classifier.evaluate(input_fn=input_fn, steps=1) 227 self._assertInRange(0.0, 1.0, scores['accuracy']) 228 self.assertIn('loss', scores) 229 230 def testLogisticRegression_MatrixData_Labels1D(self): 231 """Same as the last test, but label shape is [100] instead of [100, 1].""" 232 233 def _input_fn(): 234 iris = test_data.prepare_iris_data_for_logistic_regression() 235 return { 236 'feature': constant_op.constant(iris.data, dtype=dtypes.float32) 237 }, constant_op.constant( 238 iris.target, shape=[100], dtype=dtypes.int32) 239 240 classifier = debug.DebugClassifier( 241 config=run_config.RunConfig(tf_random_seed=1)) 242 classifier.fit(input_fn=_input_fn, steps=5) 243 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 244 self.assertIn('loss', scores) 245 246 def testLogisticRegression_NpMatrixData(self): 247 """Tests binary classification using numpy matrix data as input.""" 248 iris = test_data.prepare_iris_data_for_logistic_regression() 249 train_x = iris.data 250 train_y = iris.target 251 classifier = debug.DebugClassifier( 252 config=run_config.RunConfig(tf_random_seed=1)) 253 classifier.fit(x=train_x, y=train_y, steps=5) 254 scores = classifier.evaluate(x=train_x, y=train_y, steps=1) 255 self._assertInRange(0.0, 1.0, scores['accuracy']) 256 257 def _assertBinaryPredictions(self, expected_len, predictions): 258 self.assertEqual(expected_len, len(predictions)) 259 for prediction in predictions: 260 self.assertIn(prediction, (0, 1)) 261 262 def _assertProbabilities(self, expected_batch_size, expected_n_classes, 263 probabilities): 264 self.assertEqual(expected_batch_size, len(probabilities)) 265 for b in range(expected_batch_size): 266 self.assertEqual(expected_n_classes, len(probabilities[b])) 267 for i in range(expected_n_classes): 268 self._assertInRange(0.0, 1.0, probabilities[b][i]) 269 270 def testLogisticRegression_TensorData(self): 271 """Tests binary classification using tensor data as input.""" 272 273 def _input_fn(num_epochs=None): 274 features = { 275 'age': 276 input_lib.limit_epochs( 277 constant_op.constant([[.8], [0.2], [.1]]), 278 num_epochs=num_epochs), 279 'language': 280 sparse_tensor.SparseTensor( 281 values=input_lib.limit_epochs( 282 ['en', 'fr', 'zh'], num_epochs=num_epochs), 283 indices=[[0, 0], [0, 1], [2, 0]], 284 dense_shape=[3, 2]) 285 } 286 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 287 288 classifier = debug.DebugClassifier(n_classes=2) 289 290 classifier.fit(input_fn=_input_fn, steps=50) 291 292 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 293 self._assertInRange(0.0, 1.0, scores['accuracy']) 294 self.assertIn('loss', scores) 295 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 296 predictions = list(classifier.predict_classes(input_fn=predict_input_fn)) 297 self._assertBinaryPredictions(3, predictions) 298 299 def testLogisticRegression_FloatLabel(self): 300 """Tests binary classification with float labels.""" 301 302 def _input_fn_float_label(num_epochs=None): 303 features = { 304 'age': 305 input_lib.limit_epochs( 306 constant_op.constant([[50], [20], [10]]), 307 num_epochs=num_epochs), 308 'language': 309 sparse_tensor.SparseTensor( 310 values=input_lib.limit_epochs( 311 ['en', 'fr', 'zh'], num_epochs=num_epochs), 312 indices=[[0, 0], [0, 1], [2, 0]], 313 dense_shape=[3, 2]) 314 } 315 labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32) 316 return features, labels 317 318 classifier = debug.DebugClassifier(n_classes=2) 319 320 classifier.fit(input_fn=_input_fn_float_label, steps=50) 321 322 predict_input_fn = functools.partial(_input_fn_float_label, num_epochs=1) 323 predictions = list(classifier.predict_classes(input_fn=predict_input_fn)) 324 self._assertBinaryPredictions(3, predictions) 325 predictions_proba = list( 326 classifier.predict_proba(input_fn=predict_input_fn)) 327 self._assertProbabilities(3, 2, predictions_proba) 328 329 def testMultiClass_MatrixData(self): 330 """Tests multi-class classification using matrix data as input.""" 331 classifier = debug.DebugClassifier(n_classes=3) 332 333 input_fn = test_data.iris_input_multiclass_fn 334 classifier.fit(input_fn=input_fn, steps=200) 335 scores = classifier.evaluate(input_fn=input_fn, steps=1) 336 self._assertInRange(0.0, 1.0, scores['accuracy']) 337 self.assertIn('loss', scores) 338 339 def testMultiClass_MatrixData_Labels1D(self): 340 """Same as the last test, but label shape is [150] instead of [150, 1].""" 341 342 def _input_fn(): 343 iris = base.load_iris() 344 return { 345 'feature': constant_op.constant(iris.data, dtype=dtypes.float32) 346 }, constant_op.constant( 347 iris.target, shape=[150], dtype=dtypes.int32) 348 349 classifier = debug.DebugClassifier(n_classes=3) 350 351 classifier.fit(input_fn=_input_fn, steps=200) 352 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 353 self._assertInRange(0.0, 1.0, scores['accuracy']) 354 355 def testMultiClass_NpMatrixData(self): 356 """Tests multi-class classification using numpy matrix data as input.""" 357 iris = base.load_iris() 358 train_x = iris.data 359 train_y = iris.target 360 classifier = debug.DebugClassifier(n_classes=3) 361 classifier.fit(x=train_x, y=train_y, steps=200) 362 scores = classifier.evaluate(x=train_x, y=train_y, steps=1) 363 self._assertInRange(0.0, 1.0, scores['accuracy']) 364 365 def testMultiClass_StringLabel(self): 366 """Tests multi-class classification with string labels.""" 367 368 def _input_fn_train(): 369 labels = constant_op.constant([['foo'], ['bar'], ['baz'], ['bar']]) 370 features = { 371 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 372 } 373 return features, labels 374 375 classifier = debug.DebugClassifier( 376 n_classes=3, label_keys=['foo', 'bar', 'baz']) 377 378 classifier.fit(input_fn=_input_fn_train, steps=5) 379 scores = classifier.evaluate(input_fn=_input_fn_train, steps=1) 380 self.assertIn('loss', scores) 381 382 def testLoss(self): 383 """Tests loss calculation.""" 384 385 def _input_fn_train(): 386 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 387 # The logistic prediction should be (y = 0.25). 388 labels = constant_op.constant([[1], [0], [0], [0]]) 389 features = { 390 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 391 } 392 return features, labels 393 394 classifier = debug.DebugClassifier(n_classes=2) 395 396 classifier.fit(input_fn=_input_fn_train, steps=5) 397 scores = classifier.evaluate(input_fn=_input_fn_train, steps=1) 398 self.assertIn('loss', scores) 399 400 def testLossWithWeights(self): 401 """Tests loss calculation with weights.""" 402 403 def _input_fn_train(): 404 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 405 # The logistic prediction should be (y = 0.25). 406 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 407 features = { 408 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 409 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 410 } 411 return features, labels 412 413 def _input_fn_eval(): 414 # 4 rows, with different weights. 415 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 416 features = { 417 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 418 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 419 } 420 return features, labels 421 422 classifier = debug.DebugClassifier( 423 weight_column_name='w', 424 n_classes=2, 425 config=run_config.RunConfig(tf_random_seed=1)) 426 427 classifier.fit(input_fn=_input_fn_train, steps=5) 428 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 429 self.assertIn('loss', scores) 430 431 def testTrainWithWeights(self): 432 """Tests training with given weight column.""" 433 434 def _input_fn_train(): 435 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 436 # First row has more weight than others. Model should fit (y=x) better 437 # than (y=Not(x)) due to the relative higher weight of the first row. 438 labels = constant_op.constant([[1], [0], [0], [0]]) 439 features = { 440 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 441 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 442 } 443 return features, labels 444 445 def _input_fn_eval(): 446 # Create 4 rows (y = x) 447 labels = constant_op.constant([[1], [1], [1], [1]]) 448 features = { 449 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 450 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 451 } 452 return features, labels 453 454 classifier = debug.DebugClassifier(weight_column_name='w') 455 456 classifier.fit(input_fn=_input_fn_train, steps=5) 457 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 458 self._assertInRange(0.0, 1.0, scores['accuracy']) 459 460 def testCustomMetrics(self): 461 """Tests custom evaluation metrics.""" 462 463 def _input_fn(num_epochs=None): 464 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 465 labels = constant_op.constant([[1], [0], [0], [0]]) 466 features = { 467 'x': 468 input_lib.limit_epochs( 469 array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 470 num_epochs=num_epochs), 471 } 472 return features, labels 473 474 def _my_metric_op(predictions, labels): 475 # For the case of binary classification, the 2nd column of "predictions" 476 # denotes the model predictions. 477 labels = math_ops.cast(labels, dtypes.float32) 478 predictions = array_ops.strided_slice( 479 predictions, [0, 1], [-1, 2], end_mask=1) 480 labels = math_ops.cast(labels, predictions.dtype) 481 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 482 483 classifier = debug.DebugClassifier( 484 config=run_config.RunConfig(tf_random_seed=1)) 485 486 classifier.fit(input_fn=_input_fn, steps=5) 487 scores = classifier.evaluate( 488 input_fn=_input_fn, 489 steps=5, 490 metrics={ 491 'my_accuracy': 492 MetricSpec( 493 metric_fn=metric_ops.streaming_accuracy, 494 prediction_key='classes'), 495 'my_precision': 496 MetricSpec( 497 metric_fn=metric_ops.streaming_precision, 498 prediction_key='classes'), 499 'my_metric': 500 MetricSpec( 501 metric_fn=_my_metric_op, prediction_key='probabilities') 502 }) 503 self.assertTrue( 504 set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset( 505 set(scores.keys()))) 506 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 507 predictions = np.array( 508 list(classifier.predict_classes(input_fn=predict_input_fn))) 509 self.assertEqual( 510 _sklearn.accuracy_score([1, 0, 0, 0], predictions), 511 scores['my_accuracy']) 512 513 # Test the case where the 2nd element of the key is neither "classes" nor 514 # "probabilities". 515 with self.assertRaisesRegexp(KeyError, 'bad_type'): 516 classifier.evaluate( 517 input_fn=_input_fn, 518 steps=5, 519 metrics={ 520 'bad_name': 521 MetricSpec( 522 metric_fn=metric_ops.streaming_auc, 523 prediction_key='bad_type') 524 }) 525 526 def testTrainSaveLoad(self): 527 """Tests that insures you can save and reload a trained model.""" 528 529 def _input_fn(num_epochs=None): 530 features = { 531 'age': 532 input_lib.limit_epochs( 533 constant_op.constant([[.8], [.2], [.1]]), 534 num_epochs=num_epochs), 535 'language': 536 sparse_tensor.SparseTensor( 537 values=input_lib.limit_epochs( 538 ['en', 'fr', 'zh'], num_epochs=num_epochs), 539 indices=[[0, 0], [0, 1], [2, 0]], 540 dense_shape=[3, 2]) 541 } 542 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 543 544 model_dir = tempfile.mkdtemp() 545 classifier = debug.DebugClassifier( 546 model_dir=model_dir, 547 n_classes=3, 548 config=run_config.RunConfig(tf_random_seed=1)) 549 550 classifier.fit(input_fn=_input_fn, steps=5) 551 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 552 predictions1 = classifier.predict_classes(input_fn=predict_input_fn) 553 del classifier 554 555 classifier2 = debug.DebugClassifier( 556 model_dir=model_dir, 557 n_classes=3, 558 config=run_config.RunConfig(tf_random_seed=1)) 559 predictions2 = classifier2.predict_classes(input_fn=predict_input_fn) 560 self.assertEqual(list(predictions1), list(predictions2)) 561 562 def testExport(self): 563 """Tests export model for servo.""" 564 565 def input_fn(): 566 return { 567 'age': 568 constant_op.constant([1]), 569 'language': 570 sparse_tensor.SparseTensor( 571 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 572 }, constant_op.constant([[1]]) 573 574 language = feature_column.sparse_column_with_hash_bucket('language', 100) 575 feature_columns = [ 576 feature_column.real_valued_column('age'), 577 feature_column.embedding_column(language, dimension=1) 578 ] 579 580 classifier = debug.DebugClassifier( 581 config=run_config.RunConfig(tf_random_seed=1)) 582 classifier.fit(input_fn=input_fn, steps=5) 583 584 def default_input_fn(unused_estimator, examples): 585 return feature_column_ops.parse_feature_columns_from_examples( 586 examples, feature_columns) 587 588 export_dir = tempfile.mkdtemp() 589 classifier.export(export_dir, input_fn=default_input_fn) 590 591 592class DebugRegressorTest(test.TestCase): 593 594 def setUp(self): 595 np.random.seed(100) 596 self.features = np.random.rand(NUM_EXAMPLES, 5) 597 self.targets = np.random.rand(NUM_EXAMPLES, LABEL_DIMENSION) 598 599 def testPredictScores(self): 600 """Tests that DebugRegressor outputs the mean target.""" 601 (train_features, train_labels), (test_features, 602 test_labels) = _train_test_split( 603 [self.features, self.targets]) 604 mean_target = np.mean(train_labels, 0) 605 expected_prediction = np.vstack( 606 [mean_target for _ in range(test_labels.shape[0])]) 607 608 classifier = debug.DebugRegressor(label_dimension=LABEL_DIMENSION) 609 classifier.fit( 610 input_fn=_input_fn_builder(train_features, train_labels), steps=50) 611 612 pred = classifier.predict_scores( 613 input_fn=_input_fn_builder(test_features, None)) 614 self.assertAllClose(expected_prediction, np.vstack(pred), atol=0.1) 615 616 def testExperimentIntegration(self): 617 exp = experiment.Experiment( 618 estimator=debug.DebugRegressor(), 619 train_input_fn=test_data.iris_input_logistic_fn, 620 eval_input_fn=test_data.iris_input_logistic_fn) 621 exp.test() 622 623 def testEstimatorContract(self): 624 estimator_test_utils.assert_estimator_contract(self, debug.DebugRegressor) 625 626 def testRegression_MatrixData(self): 627 """Tests regression using matrix data as input.""" 628 regressor = debug.DebugRegressor( 629 config=run_config.RunConfig(tf_random_seed=1)) 630 input_fn = test_data.iris_input_logistic_fn 631 regressor.fit(input_fn=input_fn, steps=200) 632 scores = regressor.evaluate(input_fn=input_fn, steps=1) 633 self.assertIn('loss', scores) 634 635 def testRegression_MatrixData_Labels1D(self): 636 """Same as the last test, but label shape is [100] instead of [100, 1].""" 637 638 def _input_fn(): 639 iris = test_data.prepare_iris_data_for_logistic_regression() 640 return { 641 'feature': constant_op.constant(iris.data, dtype=dtypes.float32) 642 }, constant_op.constant( 643 iris.target, shape=[100], dtype=dtypes.int32) 644 645 regressor = debug.DebugRegressor( 646 config=run_config.RunConfig(tf_random_seed=1)) 647 648 regressor.fit(input_fn=_input_fn, steps=200) 649 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 650 self.assertIn('loss', scores) 651 652 def testRegression_NpMatrixData(self): 653 """Tests binary classification using numpy matrix data as input.""" 654 iris = test_data.prepare_iris_data_for_logistic_regression() 655 train_x = iris.data 656 train_y = iris.target 657 regressor = debug.DebugRegressor( 658 config=run_config.RunConfig(tf_random_seed=1)) 659 regressor.fit(x=train_x, y=train_y, steps=200) 660 scores = regressor.evaluate(x=train_x, y=train_y, steps=1) 661 self.assertIn('loss', scores) 662 663 def testRegression_TensorData(self): 664 """Tests regression using tensor data as input.""" 665 666 def _input_fn(num_epochs=None): 667 features = { 668 'age': 669 input_lib.limit_epochs( 670 constant_op.constant([[.8], [.15], [0.]]), 671 num_epochs=num_epochs), 672 'language': 673 sparse_tensor.SparseTensor( 674 values=input_lib.limit_epochs( 675 ['en', 'fr', 'zh'], num_epochs=num_epochs), 676 indices=[[0, 0], [0, 1], [2, 0]], 677 dense_shape=[3, 2]) 678 } 679 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 680 681 regressor = debug.DebugRegressor( 682 config=run_config.RunConfig(tf_random_seed=1)) 683 684 regressor.fit(input_fn=_input_fn, steps=200) 685 686 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 687 self.assertIn('loss', scores) 688 689 def testLoss(self): 690 """Tests loss calculation.""" 691 692 def _input_fn_train(): 693 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 694 # The algorithm should learn (y = 0.25). 695 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 696 features = { 697 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 698 } 699 return features, labels 700 701 regressor = debug.DebugRegressor( 702 config=run_config.RunConfig(tf_random_seed=1)) 703 704 regressor.fit(input_fn=_input_fn_train, steps=5) 705 scores = regressor.evaluate(input_fn=_input_fn_train, steps=1) 706 self.assertIn('loss', scores) 707 708 def testLossWithWeights(self): 709 """Tests loss calculation with weights.""" 710 711 def _input_fn_train(): 712 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 713 # The algorithm should learn (y = 0.25). 714 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 715 features = { 716 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 717 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 718 } 719 return features, labels 720 721 def _input_fn_eval(): 722 # 4 rows, with different weights. 723 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 724 features = { 725 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 726 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 727 } 728 return features, labels 729 730 regressor = debug.DebugRegressor( 731 weight_column_name='w', config=run_config.RunConfig(tf_random_seed=1)) 732 733 regressor.fit(input_fn=_input_fn_train, steps=5) 734 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 735 self.assertIn('loss', scores) 736 737 def testTrainWithWeights(self): 738 """Tests training with given weight column.""" 739 740 def _input_fn_train(): 741 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 742 # First row has more weight than others. Model should fit (y=x) better 743 # than (y=Not(x)) due to the relative higher weight of the first row. 744 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 745 features = { 746 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 747 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 748 } 749 return features, labels 750 751 def _input_fn_eval(): 752 # Create 4 rows (y = x) 753 labels = constant_op.constant([[1.], [1.], [1.], [1.]]) 754 features = { 755 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 756 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 757 } 758 return features, labels 759 760 regressor = debug.DebugRegressor( 761 weight_column_name='w', config=run_config.RunConfig(tf_random_seed=1)) 762 763 regressor.fit(input_fn=_input_fn_train, steps=5) 764 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 765 self.assertIn('loss', scores) 766 767 def testCustomMetrics(self): 768 """Tests custom evaluation metrics.""" 769 770 def _input_fn(num_epochs=None): 771 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 772 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 773 features = { 774 'x': 775 input_lib.limit_epochs( 776 array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 777 num_epochs=num_epochs), 778 } 779 return features, labels 780 781 def _my_metric_op(predictions, labels): 782 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 783 784 regressor = debug.DebugRegressor( 785 config=run_config.RunConfig(tf_random_seed=1)) 786 787 regressor.fit(input_fn=_input_fn, steps=5) 788 scores = regressor.evaluate( 789 input_fn=_input_fn, 790 steps=1, 791 metrics={ 792 'my_error': 793 MetricSpec( 794 metric_fn=metric_ops.streaming_mean_squared_error, 795 prediction_key='scores'), 796 'my_metric': 797 MetricSpec(metric_fn=_my_metric_op, prediction_key='scores') 798 }) 799 self.assertIn('loss', set(scores.keys())) 800 self.assertIn('my_error', set(scores.keys())) 801 self.assertIn('my_metric', set(scores.keys())) 802 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 803 predictions = np.array( 804 list(regressor.predict_scores(input_fn=predict_input_fn))) 805 self.assertAlmostEqual( 806 _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), 807 scores['my_error']) 808 809 # Tests the case where the prediction_key is not "scores". 810 with self.assertRaisesRegexp(KeyError, 'bad_type'): 811 regressor.evaluate( 812 input_fn=_input_fn, 813 steps=1, 814 metrics={ 815 'bad_name': 816 MetricSpec( 817 metric_fn=metric_ops.streaming_auc, 818 prediction_key='bad_type') 819 }) 820 821 def testTrainSaveLoad(self): 822 """Tests that insures you can save and reload a trained model.""" 823 824 def _input_fn(num_epochs=None): 825 features = { 826 'age': 827 input_lib.limit_epochs( 828 constant_op.constant([[0.8], [0.15], [0.]]), 829 num_epochs=num_epochs), 830 'language': 831 sparse_tensor.SparseTensor( 832 values=input_lib.limit_epochs( 833 ['en', 'fr', 'zh'], num_epochs=num_epochs), 834 indices=[[0, 0], [0, 1], [2, 0]], 835 dense_shape=[3, 2]) 836 } 837 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 838 839 model_dir = tempfile.mkdtemp() 840 regressor = debug.DebugRegressor( 841 model_dir=model_dir, config=run_config.RunConfig(tf_random_seed=1)) 842 843 regressor.fit(input_fn=_input_fn, steps=5) 844 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 845 predictions = list(regressor.predict_scores(input_fn=predict_input_fn)) 846 del regressor 847 848 regressor2 = debug.DebugRegressor( 849 model_dir=model_dir, config=run_config.RunConfig(tf_random_seed=1)) 850 predictions2 = list(regressor2.predict_scores(input_fn=predict_input_fn)) 851 self.assertAllClose(predictions, predictions2) 852 853 854if __name__ == '__main__': 855 test.main() 856