1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for DNNEstimators.""" 16 17from __future__ import absolute_import 18from __future__ import division 19from __future__ import print_function 20 21import functools 22import json 23import tempfile 24 25import numpy as np 26 27from tensorflow.contrib.layers.python.layers import feature_column 28from tensorflow.contrib.learn.python.learn import experiment 29from tensorflow.contrib.learn.python.learn.datasets import base 30from tensorflow.contrib.learn.python.learn.estimators import _sklearn 31from tensorflow.contrib.learn.python.learn.estimators import dnn 32from tensorflow.contrib.learn.python.learn.estimators import dnn_linear_combined 33from tensorflow.contrib.learn.python.learn.estimators import estimator 34from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils 35from tensorflow.contrib.learn.python.learn.estimators import head as head_lib 36from tensorflow.contrib.learn.python.learn.estimators import model_fn 37from tensorflow.contrib.learn.python.learn.estimators import run_config 38from tensorflow.contrib.learn.python.learn.estimators import test_data 39from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec 40from tensorflow.contrib.metrics.python.ops import metric_ops 41from tensorflow.python.feature_column import feature_column_lib as fc_core 42from tensorflow.python.framework import constant_op 43from tensorflow.python.framework import dtypes 44from tensorflow.python.framework import sparse_tensor 45from tensorflow.python.ops import array_ops 46from tensorflow.python.ops import init_ops 47from tensorflow.python.ops import math_ops 48from tensorflow.python.platform import test 49from tensorflow.python.training import input as input_lib 50from tensorflow.python.training import monitored_session 51from tensorflow.python.training import server_lib 52 53 54class EmbeddingMultiplierTest(test.TestCase): 55 """dnn_model_fn tests.""" 56 57 def testRaisesNonEmbeddingColumn(self): 58 one_hot_language = feature_column.one_hot_column( 59 feature_column.sparse_column_with_hash_bucket('language', 10)) 60 61 params = { 62 'feature_columns': [one_hot_language], 63 'head': head_lib.multi_class_head(2), 64 'hidden_units': [1], 65 # Set lr mult to 0. to keep embeddings constant. 66 'embedding_lr_multipliers': { 67 one_hot_language: 0.0 68 }, 69 } 70 features = { 71 'language': 72 sparse_tensor.SparseTensor( 73 values=['en', 'fr', 'zh'], 74 indices=[[0, 0], [1, 0], [2, 0]], 75 dense_shape=[3, 1]), 76 } 77 labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32) 78 with self.assertRaisesRegexp(ValueError, 79 'can only be defined for embedding columns'): 80 dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN, params) 81 82 def testMultipliesGradient(self): 83 embedding_language = feature_column.embedding_column( 84 feature_column.sparse_column_with_hash_bucket('language', 10), 85 dimension=1, 86 initializer=init_ops.constant_initializer(0.1)) 87 embedding_wire = feature_column.embedding_column( 88 feature_column.sparse_column_with_hash_bucket('wire', 10), 89 dimension=1, 90 initializer=init_ops.constant_initializer(0.1)) 91 92 params = { 93 'feature_columns': [embedding_language, embedding_wire], 94 'head': head_lib.multi_class_head(2), 95 'hidden_units': [1], 96 # Set lr mult to 0. to keep embeddings constant. 97 'embedding_lr_multipliers': { 98 embedding_language: 0.0 99 }, 100 } 101 features = { 102 'language': 103 sparse_tensor.SparseTensor( 104 values=['en', 'fr', 'zh'], 105 indices=[[0, 0], [1, 0], [2, 0]], 106 dense_shape=[3, 1]), 107 'wire': 108 sparse_tensor.SparseTensor( 109 values=['omar', 'stringer', 'marlo'], 110 indices=[[0, 0], [1, 0], [2, 0]], 111 dense_shape=[3, 1]), 112 } 113 labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32) 114 model_ops = dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN, 115 params) 116 with monitored_session.MonitoredSession() as sess: 117 language_var = dnn_linear_combined._get_embedding_variable( 118 embedding_language, 'dnn', 'dnn/input_from_feature_columns') 119 wire_var = dnn_linear_combined._get_embedding_variable( 120 embedding_wire, 'dnn', 'dnn/input_from_feature_columns') 121 for _ in range(2): 122 _, language_value, wire_value = sess.run( 123 [model_ops.train_op, language_var, wire_var]) 124 initial_value = np.full_like(language_value, 0.1) 125 self.assertTrue(np.all(np.isclose(language_value, initial_value))) 126 self.assertFalse(np.all(np.isclose(wire_value, initial_value))) 127 128 129class ActivationFunctionTest(test.TestCase): 130 131 def _getModelForActivation(self, activation_fn): 132 embedding_language = feature_column.embedding_column( 133 feature_column.sparse_column_with_hash_bucket('language', 10), 134 dimension=1, 135 initializer=init_ops.constant_initializer(0.1)) 136 params = { 137 'feature_columns': [embedding_language], 138 'head': head_lib.multi_class_head(2), 139 'hidden_units': [1], 140 'activation_fn': activation_fn, 141 } 142 features = { 143 'language': 144 sparse_tensor.SparseTensor( 145 values=['en', 'fr', 'zh'], 146 indices=[[0, 0], [1, 0], [2, 0]], 147 dense_shape=[3, 1]), 148 } 149 labels = constant_op.constant([[0], [0], [0]], dtype=dtypes.int32) 150 return dnn._dnn_model_fn(features, labels, model_fn.ModeKeys.TRAIN, params) 151 152 def testValidActivation(self): 153 _ = self._getModelForActivation('relu') 154 155 def testRaisesOnBadActivationName(self): 156 with self.assertRaisesRegexp(ValueError, 157 'Activation name should be one of'): 158 self._getModelForActivation('max_pool') 159 160 161class DNNEstimatorTest(test.TestCase): 162 163 def _assertInRange(self, expected_min, expected_max, actual): 164 self.assertLessEqual(expected_min, actual) 165 self.assertGreaterEqual(expected_max, actual) 166 167 def testExperimentIntegration(self): 168 exp = experiment.Experiment( 169 estimator=dnn.DNNClassifier( 170 n_classes=3, 171 feature_columns=[ 172 feature_column.real_valued_column( 173 'feature', dimension=4) 174 ], 175 hidden_units=[3, 3]), 176 train_input_fn=test_data.iris_input_multiclass_fn, 177 eval_input_fn=test_data.iris_input_multiclass_fn) 178 exp.test() 179 180 def testEstimatorContract(self): 181 estimator_test_utils.assert_estimator_contract(self, dnn.DNNEstimator) 182 183 def testTrainWithWeights(self): 184 """Tests training with given weight column.""" 185 186 def _input_fn_train(): 187 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 188 # First row has more weight than others. Model should fit (y=x) better 189 # than (y=Not(x)) due to the relative higher weight of the first row. 190 labels = constant_op.constant([[1], [0], [0], [0]]) 191 features = { 192 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 193 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 194 } 195 return features, labels 196 197 def _input_fn_eval(): 198 # Create 4 rows (y = x) 199 labels = constant_op.constant([[1], [1], [1], [1]]) 200 features = { 201 'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32), 202 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 203 } 204 return features, labels 205 206 dnn_estimator = dnn.DNNEstimator( 207 head=head_lib.multi_class_head(2, weight_column_name='w'), 208 feature_columns=[feature_column.real_valued_column('x')], 209 hidden_units=[3, 3], 210 config=run_config.RunConfig(tf_random_seed=1)) 211 212 dnn_estimator.fit(input_fn=_input_fn_train, steps=5) 213 scores = dnn_estimator.evaluate(input_fn=_input_fn_eval, steps=1) 214 self._assertInRange(0.0, 1.0, scores['accuracy']) 215 216 217class DNNClassifierTest(test.TestCase): 218 219 def testExperimentIntegration(self): 220 exp = experiment.Experiment( 221 estimator=dnn.DNNClassifier( 222 n_classes=3, 223 feature_columns=[ 224 feature_column.real_valued_column( 225 'feature', dimension=4) 226 ], 227 hidden_units=[3, 3]), 228 train_input_fn=test_data.iris_input_multiclass_fn, 229 eval_input_fn=test_data.iris_input_multiclass_fn) 230 exp.test() 231 232 def _assertInRange(self, expected_min, expected_max, actual): 233 self.assertLessEqual(expected_min, actual) 234 self.assertGreaterEqual(expected_max, actual) 235 236 def testEstimatorContract(self): 237 estimator_test_utils.assert_estimator_contract(self, dnn.DNNClassifier) 238 239 def testEmbeddingMultiplier(self): 240 embedding_language = feature_column.embedding_column( 241 feature_column.sparse_column_with_hash_bucket('language', 10), 242 dimension=1, 243 initializer=init_ops.constant_initializer(0.1)) 244 classifier = dnn.DNNClassifier( 245 feature_columns=[embedding_language], 246 hidden_units=[3, 3], 247 embedding_lr_multipliers={embedding_language: 0.8}) 248 self.assertEqual({ 249 embedding_language: 0.8 250 }, classifier.params['embedding_lr_multipliers']) 251 252 def testInputPartitionSize(self): 253 def _input_fn_float_label(num_epochs=None): 254 features = { 255 'language': 256 sparse_tensor.SparseTensor( 257 values=input_lib.limit_epochs( 258 ['en', 'fr', 'zh'], num_epochs=num_epochs), 259 indices=[[0, 0], [0, 1], [2, 0]], 260 dense_shape=[3, 2]) 261 } 262 labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32) 263 return features, labels 264 265 language_column = feature_column.sparse_column_with_hash_bucket( 266 'language', hash_bucket_size=20) 267 feature_columns = [ 268 feature_column.embedding_column(language_column, dimension=1), 269 ] 270 271 # Set num_ps_replica to be 10 and the min slice size to be extremely small, 272 # so as to ensure that there'll be 10 partititions produced. 273 config = run_config.RunConfig(tf_random_seed=1) 274 config._num_ps_replicas = 10 275 classifier = dnn.DNNClassifier( 276 n_classes=2, 277 feature_columns=feature_columns, 278 hidden_units=[3, 3], 279 optimizer='Adagrad', 280 config=config, 281 input_layer_min_slice_size=1) 282 283 # Ensure the param is passed in. 284 self.assertEqual(1, classifier.params['input_layer_min_slice_size']) 285 286 # Ensure the partition count is 10. 287 classifier.fit(input_fn=_input_fn_float_label, steps=50) 288 partition_count = 0 289 for name in classifier.get_variable_names(): 290 if 'language_embedding' in name and 'Adagrad' in name: 291 partition_count += 1 292 self.assertEqual(10, partition_count) 293 294 def testLogisticRegression_MatrixData(self): 295 """Tests binary classification using matrix data as input.""" 296 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 297 298 classifier = dnn.DNNClassifier( 299 feature_columns=cont_features, 300 hidden_units=[3, 3], 301 config=run_config.RunConfig(tf_random_seed=1)) 302 303 input_fn = test_data.iris_input_logistic_fn 304 classifier.fit(input_fn=input_fn, steps=5) 305 scores = classifier.evaluate(input_fn=input_fn, steps=1) 306 self._assertInRange(0.0, 1.0, scores['accuracy']) 307 self.assertIn('loss', scores) 308 309 def testLogisticRegression_MatrixData_Labels1D(self): 310 """Same as the last test, but label shape is [100] instead of [100, 1].""" 311 312 def _input_fn(): 313 iris = test_data.prepare_iris_data_for_logistic_regression() 314 return { 315 'feature': constant_op.constant( 316 iris.data, dtype=dtypes.float32) 317 }, constant_op.constant( 318 iris.target, shape=[100], dtype=dtypes.int32) 319 320 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 321 322 classifier = dnn.DNNClassifier( 323 feature_columns=cont_features, 324 hidden_units=[3, 3], 325 config=run_config.RunConfig(tf_random_seed=1)) 326 327 classifier.fit(input_fn=_input_fn, steps=5) 328 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 329 self.assertIn('loss', scores) 330 331 def testLogisticRegression_NpMatrixData(self): 332 """Tests binary classification using numpy matrix data as input.""" 333 iris = test_data.prepare_iris_data_for_logistic_regression() 334 train_x = iris.data 335 train_y = iris.target 336 feature_columns = [feature_column.real_valued_column('', dimension=4)] 337 classifier = dnn.DNNClassifier( 338 feature_columns=feature_columns, 339 hidden_units=[3, 3], 340 config=run_config.RunConfig(tf_random_seed=1)) 341 342 classifier.fit(x=train_x, y=train_y, steps=5) 343 scores = classifier.evaluate(x=train_x, y=train_y, steps=1) 344 self._assertInRange(0.0, 1.0, scores['accuracy']) 345 346 def _assertBinaryPredictions(self, expected_len, predictions): 347 self.assertEqual(expected_len, len(predictions)) 348 for prediction in predictions: 349 self.assertIn(prediction, (0, 1)) 350 351 def _assertClassificationPredictions( 352 self, expected_len, n_classes, predictions): 353 self.assertEqual(expected_len, len(predictions)) 354 for prediction in predictions: 355 self.assertIn(prediction, range(n_classes)) 356 357 def _assertProbabilities(self, expected_batch_size, expected_n_classes, 358 probabilities): 359 self.assertEqual(expected_batch_size, len(probabilities)) 360 for b in range(expected_batch_size): 361 self.assertEqual(expected_n_classes, len(probabilities[b])) 362 for i in range(expected_n_classes): 363 self._assertInRange(0.0, 1.0, probabilities[b][i]) 364 365 def testEstimatorWithCoreFeatureColumns(self): 366 367 def _input_fn(num_epochs=None): 368 features = { 369 'age': 370 input_lib.limit_epochs( 371 constant_op.constant([[.8], [0.2], [.1]]), 372 num_epochs=num_epochs), 373 'language': 374 sparse_tensor.SparseTensor( 375 values=input_lib.limit_epochs( 376 ['en', 'fr', 'zh'], num_epochs=num_epochs), 377 indices=[[0, 0], [0, 1], [2, 0]], 378 dense_shape=[3, 2]) 379 } 380 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 381 382 language_column = fc_core.categorical_column_with_hash_bucket( 383 'language', hash_bucket_size=20) 384 feature_columns = [ 385 fc_core.embedding_column(language_column, dimension=1), 386 fc_core.numeric_column('age') 387 ] 388 389 classifier = dnn.DNNClassifier( 390 n_classes=2, 391 feature_columns=feature_columns, 392 hidden_units=[10, 10], 393 config=run_config.RunConfig(tf_random_seed=1)) 394 395 classifier.fit(input_fn=_input_fn, steps=50) 396 397 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 398 self._assertInRange(0.0, 1.0, scores['accuracy']) 399 self.assertIn('loss', scores) 400 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 401 predicted_classes = list( 402 classifier.predict_classes(input_fn=predict_input_fn, as_iterable=True)) 403 self._assertBinaryPredictions(3, predicted_classes) 404 predictions = list( 405 classifier.predict(input_fn=predict_input_fn, as_iterable=True)) 406 self.assertAllEqual(predicted_classes, predictions) 407 408 def testLogisticRegression_TensorData(self): 409 """Tests binary classification using tensor data as input.""" 410 411 def _input_fn(num_epochs=None): 412 features = { 413 'age': 414 input_lib.limit_epochs( 415 constant_op.constant([[.8], [0.2], [.1]]), 416 num_epochs=num_epochs), 417 'language': 418 sparse_tensor.SparseTensor( 419 values=input_lib.limit_epochs( 420 ['en', 'fr', 'zh'], num_epochs=num_epochs), 421 indices=[[0, 0], [0, 1], [2, 0]], 422 dense_shape=[3, 2]) 423 } 424 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 425 426 language_column = feature_column.sparse_column_with_hash_bucket( 427 'language', hash_bucket_size=20) 428 feature_columns = [ 429 feature_column.embedding_column( 430 language_column, dimension=1), 431 feature_column.real_valued_column('age') 432 ] 433 434 classifier = dnn.DNNClassifier( 435 n_classes=2, 436 feature_columns=feature_columns, 437 hidden_units=[10, 10], 438 config=run_config.RunConfig(tf_random_seed=1)) 439 440 classifier.fit(input_fn=_input_fn, steps=50) 441 442 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 443 self._assertInRange(0.0, 1.0, scores['accuracy']) 444 self.assertIn('loss', scores) 445 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 446 predicted_classes = list( 447 classifier.predict_classes( 448 input_fn=predict_input_fn, as_iterable=True)) 449 self._assertBinaryPredictions(3, predicted_classes) 450 predictions = list( 451 classifier.predict(input_fn=predict_input_fn, as_iterable=True)) 452 self.assertAllEqual(predicted_classes, predictions) 453 454 def testLogisticRegression_FloatLabel(self): 455 """Tests binary classification with float labels.""" 456 457 def _input_fn_float_label(num_epochs=None): 458 features = { 459 'age': 460 input_lib.limit_epochs( 461 constant_op.constant([[50], [20], [10]]), 462 num_epochs=num_epochs), 463 'language': 464 sparse_tensor.SparseTensor( 465 values=input_lib.limit_epochs( 466 ['en', 'fr', 'zh'], num_epochs=num_epochs), 467 indices=[[0, 0], [0, 1], [2, 0]], 468 dense_shape=[3, 2]) 469 } 470 labels = constant_op.constant([[0.8], [0.], [0.2]], dtype=dtypes.float32) 471 return features, labels 472 473 language_column = feature_column.sparse_column_with_hash_bucket( 474 'language', hash_bucket_size=20) 475 feature_columns = [ 476 feature_column.embedding_column( 477 language_column, dimension=1), 478 feature_column.real_valued_column('age') 479 ] 480 481 classifier = dnn.DNNClassifier( 482 n_classes=2, 483 feature_columns=feature_columns, 484 hidden_units=[3, 3], 485 config=run_config.RunConfig(tf_random_seed=1)) 486 487 classifier.fit(input_fn=_input_fn_float_label, steps=50) 488 489 predict_input_fn = functools.partial(_input_fn_float_label, num_epochs=1) 490 predicted_classes = list( 491 classifier.predict_classes( 492 input_fn=predict_input_fn, as_iterable=True)) 493 self._assertBinaryPredictions(3, predicted_classes) 494 predictions = list( 495 classifier.predict( 496 input_fn=predict_input_fn, as_iterable=True)) 497 self.assertAllEqual(predicted_classes, predictions) 498 predictions_proba = list( 499 classifier.predict_proba( 500 input_fn=predict_input_fn, as_iterable=True)) 501 self._assertProbabilities(3, 2, predictions_proba) 502 503 def testMultiClass_MatrixData(self): 504 """Tests multi-class classification using matrix data as input.""" 505 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 506 507 classifier = dnn.DNNClassifier( 508 n_classes=3, 509 feature_columns=cont_features, 510 hidden_units=[3, 3], 511 config=run_config.RunConfig(tf_random_seed=1)) 512 513 input_fn = test_data.iris_input_multiclass_fn 514 classifier.fit(input_fn=input_fn, steps=200) 515 scores = classifier.evaluate(input_fn=input_fn, steps=1) 516 self._assertInRange(0.0, 1.0, scores['accuracy']) 517 self.assertIn('loss', scores) 518 519 def testMultiClass_MatrixData_Labels1D(self): 520 """Same as the last test, but label shape is [150] instead of [150, 1].""" 521 522 def _input_fn(): 523 iris = base.load_iris() 524 return { 525 'feature': constant_op.constant( 526 iris.data, dtype=dtypes.float32) 527 }, constant_op.constant( 528 iris.target, shape=[150], dtype=dtypes.int32) 529 530 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 531 532 classifier = dnn.DNNClassifier( 533 n_classes=3, 534 feature_columns=cont_features, 535 hidden_units=[3, 3], 536 config=run_config.RunConfig(tf_random_seed=1)) 537 538 classifier.fit(input_fn=_input_fn, steps=200) 539 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 540 self._assertInRange(0.0, 1.0, scores['accuracy']) 541 542 def testMultiClass_NpMatrixData(self): 543 """Tests multi-class classification using numpy matrix data as input.""" 544 iris = base.load_iris() 545 train_x = iris.data 546 train_y = iris.target 547 feature_columns = [feature_column.real_valued_column('', dimension=4)] 548 classifier = dnn.DNNClassifier( 549 n_classes=3, 550 feature_columns=feature_columns, 551 hidden_units=[3, 3], 552 config=run_config.RunConfig(tf_random_seed=1)) 553 554 classifier.fit(x=train_x, y=train_y, steps=200) 555 scores = classifier.evaluate(x=train_x, y=train_y, steps=1) 556 self._assertInRange(0.0, 1.0, scores['accuracy']) 557 558 def testMultiClassLabelKeys(self): 559 """Tests n_classes > 2 with label_keys vocabulary for labels.""" 560 # Byte literals needed for python3 test to pass. 561 label_keys = [b'label0', b'label1', b'label2'] 562 563 def _input_fn(num_epochs=None): 564 features = { 565 'age': 566 input_lib.limit_epochs( 567 constant_op.constant([[.8], [0.2], [.1]]), 568 num_epochs=num_epochs), 569 'language': 570 sparse_tensor.SparseTensor( 571 values=input_lib.limit_epochs( 572 ['en', 'fr', 'zh'], num_epochs=num_epochs), 573 indices=[[0, 0], [0, 1], [2, 0]], 574 dense_shape=[3, 2]) 575 } 576 labels = constant_op.constant( 577 [[label_keys[1]], [label_keys[0]], [label_keys[0]]], 578 dtype=dtypes.string) 579 return features, labels 580 581 language_column = feature_column.sparse_column_with_hash_bucket( 582 'language', hash_bucket_size=20) 583 feature_columns = [ 584 feature_column.embedding_column( 585 language_column, dimension=1), 586 feature_column.real_valued_column('age') 587 ] 588 589 classifier = dnn.DNNClassifier( 590 n_classes=3, 591 feature_columns=feature_columns, 592 hidden_units=[10, 10], 593 label_keys=label_keys, 594 config=run_config.RunConfig(tf_random_seed=1)) 595 596 classifier.fit(input_fn=_input_fn, steps=50) 597 598 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 599 self._assertInRange(0.0, 1.0, scores['accuracy']) 600 self.assertIn('loss', scores) 601 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 602 predicted_classes = list( 603 classifier.predict_classes( 604 input_fn=predict_input_fn, as_iterable=True)) 605 self.assertEqual(3, len(predicted_classes)) 606 for pred in predicted_classes: 607 self.assertIn(pred, label_keys) 608 predictions = list( 609 classifier.predict(input_fn=predict_input_fn, as_iterable=True)) 610 self.assertAllEqual(predicted_classes, predictions) 611 612 def testLoss(self): 613 """Tests loss calculation.""" 614 615 def _input_fn_train(): 616 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 617 # The logistic prediction should be (y = 0.25). 618 labels = constant_op.constant([[1], [0], [0], [0]]) 619 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 620 return features, labels 621 622 classifier = dnn.DNNClassifier( 623 n_classes=2, 624 feature_columns=[feature_column.real_valued_column('x')], 625 hidden_units=[3, 3], 626 config=run_config.RunConfig(tf_random_seed=1)) 627 628 classifier.fit(input_fn=_input_fn_train, steps=5) 629 scores = classifier.evaluate(input_fn=_input_fn_train, steps=1) 630 self.assertIn('loss', scores) 631 632 def testLossWithWeights(self): 633 """Tests loss calculation with weights.""" 634 635 def _input_fn_train(): 636 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 637 # The logistic prediction should be (y = 0.25). 638 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 639 features = { 640 'x': array_ops.ones( 641 shape=[4, 1], dtype=dtypes.float32), 642 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 643 } 644 return features, labels 645 646 def _input_fn_eval(): 647 # 4 rows, with different weights. 648 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 649 features = { 650 'x': array_ops.ones( 651 shape=[4, 1], dtype=dtypes.float32), 652 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 653 } 654 return features, labels 655 656 classifier = dnn.DNNClassifier( 657 weight_column_name='w', 658 n_classes=2, 659 feature_columns=[feature_column.real_valued_column('x')], 660 hidden_units=[3, 3], 661 config=run_config.RunConfig(tf_random_seed=1)) 662 663 classifier.fit(input_fn=_input_fn_train, steps=5) 664 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 665 self.assertIn('loss', scores) 666 667 def testTrainWithWeights(self): 668 """Tests training with given weight column.""" 669 670 def _input_fn_train(): 671 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 672 # First row has more weight than others. Model should fit (y=x) better 673 # than (y=Not(x)) due to the relative higher weight of the first row. 674 labels = constant_op.constant([[1], [0], [0], [0]]) 675 features = { 676 'x': array_ops.ones( 677 shape=[4, 1], dtype=dtypes.float32), 678 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 679 } 680 return features, labels 681 682 def _input_fn_eval(): 683 # Create 4 rows (y = x) 684 labels = constant_op.constant([[1], [1], [1], [1]]) 685 features = { 686 'x': array_ops.ones( 687 shape=[4, 1], dtype=dtypes.float32), 688 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 689 } 690 return features, labels 691 692 classifier = dnn.DNNClassifier( 693 weight_column_name='w', 694 feature_columns=[feature_column.real_valued_column('x')], 695 hidden_units=[3, 3], 696 config=run_config.RunConfig(tf_random_seed=1)) 697 698 classifier.fit(input_fn=_input_fn_train, steps=5) 699 scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1) 700 self._assertInRange(0.0, 1.0, scores['accuracy']) 701 702 def testPredict_AsIterableFalse(self): 703 """Tests predict and predict_prob methods with as_iterable=False.""" 704 705 def _input_fn(num_epochs=None): 706 features = { 707 'age': 708 input_lib.limit_epochs( 709 constant_op.constant([[.8], [.2], [.1]]), 710 num_epochs=num_epochs), 711 'language': 712 sparse_tensor.SparseTensor( 713 values=input_lib.limit_epochs( 714 ['en', 'fr', 'zh'], num_epochs=num_epochs), 715 indices=[[0, 0], [0, 1], [2, 0]], 716 dense_shape=[3, 2]) 717 } 718 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 719 720 sparse_column = feature_column.sparse_column_with_hash_bucket( 721 'language', hash_bucket_size=20) 722 feature_columns = [ 723 feature_column.embedding_column( 724 sparse_column, dimension=1) 725 ] 726 727 n_classes = 3 728 classifier = dnn.DNNClassifier( 729 n_classes=n_classes, 730 feature_columns=feature_columns, 731 hidden_units=[10, 10], 732 config=run_config.RunConfig(tf_random_seed=1)) 733 734 classifier.fit(input_fn=_input_fn, steps=100) 735 736 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 737 self._assertInRange(0.0, 1.0, scores['accuracy']) 738 self.assertIn('loss', scores) 739 predicted_classes = classifier.predict_classes( 740 input_fn=_input_fn, as_iterable=False) 741 self._assertClassificationPredictions(3, n_classes, predicted_classes) 742 predictions = classifier.predict(input_fn=_input_fn, as_iterable=False) 743 self.assertAllEqual(predicted_classes, predictions) 744 probabilities = classifier.predict_proba( 745 input_fn=_input_fn, as_iterable=False) 746 self._assertProbabilities(3, n_classes, probabilities) 747 748 def testPredict_AsIterable(self): 749 """Tests predict and predict_prob methods with as_iterable=True.""" 750 751 def _input_fn(num_epochs=None): 752 features = { 753 'age': 754 input_lib.limit_epochs( 755 constant_op.constant([[.8], [.2], [.1]]), 756 num_epochs=num_epochs), 757 'language': 758 sparse_tensor.SparseTensor( 759 values=input_lib.limit_epochs( 760 ['en', 'fr', 'zh'], num_epochs=num_epochs), 761 indices=[[0, 0], [0, 1], [2, 0]], 762 dense_shape=[3, 2]) 763 } 764 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 765 766 language_column = feature_column.sparse_column_with_hash_bucket( 767 'language', hash_bucket_size=20) 768 feature_columns = [ 769 feature_column.embedding_column( 770 language_column, dimension=1), 771 feature_column.real_valued_column('age') 772 ] 773 774 n_classes = 3 775 classifier = dnn.DNNClassifier( 776 n_classes=n_classes, 777 feature_columns=feature_columns, 778 hidden_units=[3, 3], 779 config=run_config.RunConfig(tf_random_seed=1)) 780 781 classifier.fit(input_fn=_input_fn, steps=300) 782 783 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 784 self._assertInRange(0.0, 1.0, scores['accuracy']) 785 self.assertIn('loss', scores) 786 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 787 predicted_classes = list( 788 classifier.predict_classes( 789 input_fn=predict_input_fn, as_iterable=True)) 790 self._assertClassificationPredictions(3, n_classes, predicted_classes) 791 predictions = list( 792 classifier.predict( 793 input_fn=predict_input_fn, as_iterable=True)) 794 self.assertAllEqual(predicted_classes, predictions) 795 predicted_proba = list( 796 classifier.predict_proba( 797 input_fn=predict_input_fn, as_iterable=True)) 798 self._assertProbabilities(3, n_classes, predicted_proba) 799 800 def testCustomMetrics(self): 801 """Tests custom evaluation metrics.""" 802 803 def _input_fn(num_epochs=None): 804 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 805 labels = constant_op.constant([[1], [0], [0], [0]]) 806 features = { 807 'x': 808 input_lib.limit_epochs( 809 array_ops.ones( 810 shape=[4, 1], dtype=dtypes.float32), 811 num_epochs=num_epochs), 812 } 813 return features, labels 814 815 def _my_metric_op(predictions, labels): 816 # For the case of binary classification, the 2nd column of "predictions" 817 # denotes the model predictions. 818 labels = math_ops.cast(labels, dtypes.float32) 819 predictions = array_ops.strided_slice( 820 predictions, [0, 1], [-1, 2], end_mask=1) 821 labels = math_ops.cast(labels, predictions.dtype) 822 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 823 824 classifier = dnn.DNNClassifier( 825 feature_columns=[feature_column.real_valued_column('x')], 826 hidden_units=[3, 3], 827 config=run_config.RunConfig(tf_random_seed=1)) 828 829 classifier.fit(input_fn=_input_fn, steps=5) 830 scores = classifier.evaluate( 831 input_fn=_input_fn, 832 steps=5, 833 metrics={ 834 'my_accuracy': 835 MetricSpec( 836 metric_fn=metric_ops.streaming_accuracy, 837 prediction_key='classes'), 838 'my_precision': 839 MetricSpec( 840 metric_fn=metric_ops.streaming_precision, 841 prediction_key='classes'), 842 'my_metric': 843 MetricSpec( 844 metric_fn=_my_metric_op, prediction_key='probabilities') 845 }) 846 self.assertTrue( 847 set(['loss', 'my_accuracy', 'my_precision', 'my_metric']).issubset( 848 set(scores.keys()))) 849 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 850 predictions = np.array(list(classifier.predict_classes( 851 input_fn=predict_input_fn))) 852 self.assertEqual( 853 _sklearn.accuracy_score([1, 0, 0, 0], predictions), 854 scores['my_accuracy']) 855 856 # Test the case where the 2nd element of the key is neither "classes" nor 857 # "probabilities". 858 with self.assertRaisesRegexp(KeyError, 'bad_type'): 859 classifier.evaluate( 860 input_fn=_input_fn, 861 steps=5, 862 metrics={ 863 'bad_name': 864 MetricSpec( 865 metric_fn=metric_ops.streaming_auc, 866 prediction_key='bad_type') 867 }) 868 869 def testTrainSaveLoad(self): 870 """Tests that insures you can save and reload a trained model.""" 871 872 def _input_fn(num_epochs=None): 873 features = { 874 'age': 875 input_lib.limit_epochs( 876 constant_op.constant([[.8], [.2], [.1]]), 877 num_epochs=num_epochs), 878 'language': 879 sparse_tensor.SparseTensor( 880 values=input_lib.limit_epochs( 881 ['en', 'fr', 'zh'], num_epochs=num_epochs), 882 indices=[[0, 0], [0, 1], [2, 0]], 883 dense_shape=[3, 2]) 884 } 885 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 886 887 sparse_column = feature_column.sparse_column_with_hash_bucket( 888 'language', hash_bucket_size=20) 889 feature_columns = [ 890 feature_column.embedding_column( 891 sparse_column, dimension=1) 892 ] 893 894 model_dir = tempfile.mkdtemp() 895 classifier = dnn.DNNClassifier( 896 model_dir=model_dir, 897 n_classes=3, 898 feature_columns=feature_columns, 899 hidden_units=[3, 3], 900 config=run_config.RunConfig(tf_random_seed=1)) 901 902 classifier.fit(input_fn=_input_fn, steps=5) 903 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 904 predictions1 = classifier.predict_classes(input_fn=predict_input_fn) 905 del classifier 906 907 classifier2 = dnn.DNNClassifier( 908 model_dir=model_dir, 909 n_classes=3, 910 feature_columns=feature_columns, 911 hidden_units=[3, 3], 912 config=run_config.RunConfig(tf_random_seed=1)) 913 predictions2 = classifier2.predict_classes(input_fn=predict_input_fn) 914 self.assertEqual(list(predictions1), list(predictions2)) 915 916 def testTrainWithPartitionedVariables(self): 917 """Tests training with partitioned variables.""" 918 919 def _input_fn(num_epochs=None): 920 features = { 921 'age': 922 input_lib.limit_epochs( 923 constant_op.constant([[.8], [.2], [.1]]), 924 num_epochs=num_epochs), 925 'language': 926 sparse_tensor.SparseTensor( 927 values=input_lib.limit_epochs( 928 ['en', 'fr', 'zh'], num_epochs=num_epochs), 929 indices=[[0, 0], [0, 1], [2, 0]], 930 dense_shape=[3, 2]) 931 } 932 return features, constant_op.constant([[1], [0], [0]], dtype=dtypes.int32) 933 934 # The given hash_bucket_size results in variables larger than the 935 # default min_slice_size attribute, so the variables are partitioned. 936 sparse_column = feature_column.sparse_column_with_hash_bucket( 937 'language', hash_bucket_size=2e7) 938 feature_columns = [ 939 feature_column.embedding_column( 940 sparse_column, dimension=1) 941 ] 942 943 tf_config = { 944 'cluster': { 945 run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] 946 } 947 } 948 with test.mock.patch.dict('os.environ', 949 {'TF_CONFIG': json.dumps(tf_config)}): 950 config = run_config.RunConfig(tf_random_seed=1) 951 # Because we did not start a distributed cluster, we need to pass an 952 # empty ClusterSpec, otherwise the device_setter will look for 953 # distributed jobs, such as "/job:ps" which are not present. 954 config._cluster_spec = server_lib.ClusterSpec({}) 955 956 classifier = dnn.DNNClassifier( 957 n_classes=3, 958 feature_columns=feature_columns, 959 hidden_units=[3, 3], 960 config=config) 961 962 classifier.fit(input_fn=_input_fn, steps=5) 963 scores = classifier.evaluate(input_fn=_input_fn, steps=1) 964 self._assertInRange(0.0, 1.0, scores['accuracy']) 965 self.assertIn('loss', scores) 966 967 def testExport(self): 968 """Tests export model for servo.""" 969 970 def input_fn(): 971 return { 972 'age': 973 constant_op.constant([1]), 974 'language': 975 sparse_tensor.SparseTensor( 976 values=['english'], indices=[[0, 0]], dense_shape=[1, 1]) 977 }, constant_op.constant([[1]]) 978 979 language = feature_column.sparse_column_with_hash_bucket('language', 100) 980 feature_columns = [ 981 feature_column.real_valued_column('age'), 982 feature_column.embedding_column( 983 language, dimension=1) 984 ] 985 986 classifier = dnn.DNNClassifier( 987 feature_columns=feature_columns, hidden_units=[3, 3]) 988 classifier.fit(input_fn=input_fn, steps=5) 989 990 export_dir = tempfile.mkdtemp() 991 classifier.export(export_dir) 992 993 def testEnableCenteredBias(self): 994 """Tests that we can enable centered bias.""" 995 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 996 997 classifier = dnn.DNNClassifier( 998 n_classes=3, 999 feature_columns=cont_features, 1000 hidden_units=[3, 3], 1001 enable_centered_bias=True, 1002 config=run_config.RunConfig(tf_random_seed=1)) 1003 1004 input_fn = test_data.iris_input_multiclass_fn 1005 classifier.fit(input_fn=input_fn, steps=5) 1006 self.assertIn('dnn/multi_class_head/centered_bias_weight', 1007 classifier.get_variable_names()) 1008 scores = classifier.evaluate(input_fn=input_fn, steps=1) 1009 self._assertInRange(0.0, 1.0, scores['accuracy']) 1010 self.assertIn('loss', scores) 1011 1012 def testDisableCenteredBias(self): 1013 """Tests that we can disable centered bias.""" 1014 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1015 1016 classifier = dnn.DNNClassifier( 1017 n_classes=3, 1018 feature_columns=cont_features, 1019 hidden_units=[3, 3], 1020 enable_centered_bias=False, 1021 config=run_config.RunConfig(tf_random_seed=1)) 1022 1023 input_fn = test_data.iris_input_multiclass_fn 1024 classifier.fit(input_fn=input_fn, steps=5) 1025 self.assertNotIn('centered_bias_weight', classifier.get_variable_names()) 1026 scores = classifier.evaluate(input_fn=input_fn, steps=1) 1027 self._assertInRange(0.0, 1.0, scores['accuracy']) 1028 self.assertIn('loss', scores) 1029 1030 1031class DNNRegressorTest(test.TestCase): 1032 1033 def testExperimentIntegration(self): 1034 exp = experiment.Experiment( 1035 estimator=dnn.DNNRegressor( 1036 feature_columns=[ 1037 feature_column.real_valued_column( 1038 'feature', dimension=4) 1039 ], 1040 hidden_units=[3, 3]), 1041 train_input_fn=test_data.iris_input_logistic_fn, 1042 eval_input_fn=test_data.iris_input_logistic_fn) 1043 exp.test() 1044 1045 def testEstimatorContract(self): 1046 estimator_test_utils.assert_estimator_contract(self, dnn.DNNRegressor) 1047 1048 def testRegression_MatrixData(self): 1049 """Tests regression using matrix data as input.""" 1050 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1051 1052 regressor = dnn.DNNRegressor( 1053 feature_columns=cont_features, 1054 hidden_units=[3, 3], 1055 config=run_config.RunConfig(tf_random_seed=1)) 1056 1057 input_fn = test_data.iris_input_logistic_fn 1058 regressor.fit(input_fn=input_fn, steps=200) 1059 scores = regressor.evaluate(input_fn=input_fn, steps=1) 1060 self.assertIn('loss', scores) 1061 1062 def testRegression_MatrixData_Labels1D(self): 1063 """Same as the last test, but label shape is [100] instead of [100, 1].""" 1064 1065 def _input_fn(): 1066 iris = test_data.prepare_iris_data_for_logistic_regression() 1067 return { 1068 'feature': constant_op.constant( 1069 iris.data, dtype=dtypes.float32) 1070 }, constant_op.constant( 1071 iris.target, shape=[100], dtype=dtypes.int32) 1072 1073 cont_features = [feature_column.real_valued_column('feature', dimension=4)] 1074 1075 regressor = dnn.DNNRegressor( 1076 feature_columns=cont_features, 1077 hidden_units=[3, 3], 1078 config=run_config.RunConfig(tf_random_seed=1)) 1079 1080 regressor.fit(input_fn=_input_fn, steps=200) 1081 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1082 self.assertIn('loss', scores) 1083 1084 def testRegression_NpMatrixData(self): 1085 """Tests binary classification using numpy matrix data as input.""" 1086 iris = test_data.prepare_iris_data_for_logistic_regression() 1087 train_x = iris.data 1088 train_y = iris.target 1089 feature_columns = [feature_column.real_valued_column('', dimension=4)] 1090 regressor = dnn.DNNRegressor( 1091 feature_columns=feature_columns, 1092 hidden_units=[3, 3], 1093 config=run_config.RunConfig(tf_random_seed=1)) 1094 1095 regressor.fit(x=train_x, y=train_y, steps=200) 1096 scores = regressor.evaluate(x=train_x, y=train_y, steps=1) 1097 self.assertIn('loss', scores) 1098 1099 def testRegression_TensorData(self): 1100 """Tests regression using tensor data as input.""" 1101 1102 def _input_fn(num_epochs=None): 1103 features = { 1104 'age': 1105 input_lib.limit_epochs( 1106 constant_op.constant([[.8], [.15], [0.]]), 1107 num_epochs=num_epochs), 1108 'language': 1109 sparse_tensor.SparseTensor( 1110 values=input_lib.limit_epochs( 1111 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1112 indices=[[0, 0], [0, 1], [2, 0]], 1113 dense_shape=[3, 2]) 1114 } 1115 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1116 1117 language_column = feature_column.sparse_column_with_hash_bucket( 1118 'language', hash_bucket_size=20) 1119 feature_columns = [ 1120 feature_column.embedding_column( 1121 language_column, dimension=1), 1122 feature_column.real_valued_column('age') 1123 ] 1124 1125 regressor = dnn.DNNRegressor( 1126 feature_columns=feature_columns, 1127 hidden_units=[3, 3], 1128 config=run_config.RunConfig(tf_random_seed=1)) 1129 1130 regressor.fit(input_fn=_input_fn, steps=200) 1131 1132 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1133 self.assertIn('loss', scores) 1134 1135 def testLoss(self): 1136 """Tests loss calculation.""" 1137 1138 def _input_fn_train(): 1139 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1140 # The algorithm should learn (y = 0.25). 1141 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1142 features = {'x': array_ops.ones(shape=[4, 1], dtype=dtypes.float32),} 1143 return features, labels 1144 1145 regressor = dnn.DNNRegressor( 1146 feature_columns=[feature_column.real_valued_column('x')], 1147 hidden_units=[3, 3], 1148 config=run_config.RunConfig(tf_random_seed=1)) 1149 1150 regressor.fit(input_fn=_input_fn_train, steps=5) 1151 scores = regressor.evaluate(input_fn=_input_fn_train, steps=1) 1152 self.assertIn('loss', scores) 1153 1154 def testLossWithWeights(self): 1155 """Tests loss calculation with weights.""" 1156 1157 def _input_fn_train(): 1158 # 4 rows with equal weight, one of them (y = x), three of them (y=Not(x)) 1159 # The algorithm should learn (y = 0.25). 1160 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1161 features = { 1162 'x': array_ops.ones( 1163 shape=[4, 1], dtype=dtypes.float32), 1164 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 1165 } 1166 return features, labels 1167 1168 def _input_fn_eval(): 1169 # 4 rows, with different weights. 1170 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1171 features = { 1172 'x': array_ops.ones( 1173 shape=[4, 1], dtype=dtypes.float32), 1174 'w': constant_op.constant([[7.], [1.], [1.], [1.]]) 1175 } 1176 return features, labels 1177 1178 regressor = dnn.DNNRegressor( 1179 weight_column_name='w', 1180 feature_columns=[feature_column.real_valued_column('x')], 1181 hidden_units=[3, 3], 1182 config=run_config.RunConfig(tf_random_seed=1)) 1183 1184 regressor.fit(input_fn=_input_fn_train, steps=5) 1185 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 1186 self.assertIn('loss', scores) 1187 1188 def testTrainWithWeights(self): 1189 """Tests training with given weight column.""" 1190 1191 def _input_fn_train(): 1192 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1193 # First row has more weight than others. Model should fit (y=x) better 1194 # than (y=Not(x)) due to the relative higher weight of the first row. 1195 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1196 features = { 1197 'x': array_ops.ones( 1198 shape=[4, 1], dtype=dtypes.float32), 1199 'w': constant_op.constant([[100.], [3.], [2.], [2.]]) 1200 } 1201 return features, labels 1202 1203 def _input_fn_eval(): 1204 # Create 4 rows (y = x) 1205 labels = constant_op.constant([[1.], [1.], [1.], [1.]]) 1206 features = { 1207 'x': array_ops.ones( 1208 shape=[4, 1], dtype=dtypes.float32), 1209 'w': constant_op.constant([[1.], [1.], [1.], [1.]]) 1210 } 1211 return features, labels 1212 1213 regressor = dnn.DNNRegressor( 1214 weight_column_name='w', 1215 feature_columns=[feature_column.real_valued_column('x')], 1216 hidden_units=[3, 3], 1217 config=run_config.RunConfig(tf_random_seed=1)) 1218 1219 regressor.fit(input_fn=_input_fn_train, steps=5) 1220 scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1) 1221 self.assertIn('loss', scores) 1222 1223 def _assertRegressionOutputs( 1224 self, predictions, expected_shape): 1225 predictions_nparray = np.array(predictions) 1226 self.assertAllEqual(expected_shape, predictions_nparray.shape) 1227 self.assertTrue(np.issubdtype(predictions_nparray.dtype, np.floating)) 1228 1229 def testPredict_AsIterableFalse(self): 1230 """Tests predict method with as_iterable=False.""" 1231 labels = [1., 0., 0.2] 1232 1233 def _input_fn(num_epochs=None): 1234 features = { 1235 'age': 1236 input_lib.limit_epochs( 1237 constant_op.constant([[0.8], [0.15], [0.]]), 1238 num_epochs=num_epochs), 1239 'language': 1240 sparse_tensor.SparseTensor( 1241 values=input_lib.limit_epochs( 1242 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1243 indices=[[0, 0], [0, 1], [2, 0]], 1244 dense_shape=[3, 2]) 1245 } 1246 return features, constant_op.constant(labels, dtype=dtypes.float32) 1247 1248 sparse_column = feature_column.sparse_column_with_hash_bucket( 1249 'language', hash_bucket_size=20) 1250 feature_columns = [ 1251 feature_column.embedding_column( 1252 sparse_column, dimension=1), 1253 feature_column.real_valued_column('age') 1254 ] 1255 1256 regressor = dnn.DNNRegressor( 1257 feature_columns=feature_columns, 1258 hidden_units=[3, 3], 1259 config=run_config.RunConfig(tf_random_seed=1)) 1260 1261 regressor.fit(input_fn=_input_fn, steps=200) 1262 1263 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1264 self.assertIn('loss', scores) 1265 predicted_scores = regressor.predict_scores( 1266 input_fn=_input_fn, as_iterable=False) 1267 self._assertRegressionOutputs(predicted_scores, [3]) 1268 predictions = regressor.predict(input_fn=_input_fn, as_iterable=False) 1269 self.assertAllClose(predicted_scores, predictions) 1270 1271 def testPredict_AsIterable(self): 1272 """Tests predict method with as_iterable=True.""" 1273 labels = [1., 0., 0.2] 1274 1275 def _input_fn(num_epochs=None): 1276 features = { 1277 'age': 1278 input_lib.limit_epochs( 1279 constant_op.constant([[0.8], [0.15], [0.]]), 1280 num_epochs=num_epochs), 1281 'language': 1282 sparse_tensor.SparseTensor( 1283 values=input_lib.limit_epochs( 1284 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1285 indices=[[0, 0], [0, 1], [2, 0]], 1286 dense_shape=[3, 2]) 1287 } 1288 return features, constant_op.constant(labels, dtype=dtypes.float32) 1289 1290 sparse_column = feature_column.sparse_column_with_hash_bucket( 1291 'language', hash_bucket_size=20) 1292 feature_columns = [ 1293 feature_column.embedding_column( 1294 sparse_column, dimension=1), 1295 feature_column.real_valued_column('age') 1296 ] 1297 1298 regressor = dnn.DNNRegressor( 1299 feature_columns=feature_columns, 1300 hidden_units=[3, 3], 1301 config=run_config.RunConfig(tf_random_seed=1)) 1302 1303 regressor.fit(input_fn=_input_fn, steps=200) 1304 1305 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1306 self.assertIn('loss', scores) 1307 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1308 predicted_scores = list( 1309 regressor.predict_scores( 1310 input_fn=predict_input_fn, as_iterable=True)) 1311 self._assertRegressionOutputs(predicted_scores, [3]) 1312 predictions = list( 1313 regressor.predict(input_fn=predict_input_fn, as_iterable=True)) 1314 self.assertAllClose(predicted_scores, predictions) 1315 1316 def testCustomMetrics(self): 1317 """Tests custom evaluation metrics.""" 1318 1319 def _input_fn(num_epochs=None): 1320 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1321 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1322 features = { 1323 'x': 1324 input_lib.limit_epochs( 1325 array_ops.ones( 1326 shape=[4, 1], dtype=dtypes.float32), 1327 num_epochs=num_epochs), 1328 } 1329 return features, labels 1330 1331 def _my_metric_op(predictions, labels): 1332 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 1333 1334 regressor = dnn.DNNRegressor( 1335 feature_columns=[feature_column.real_valued_column('x')], 1336 hidden_units=[3, 3], 1337 config=run_config.RunConfig(tf_random_seed=1)) 1338 1339 regressor.fit(input_fn=_input_fn, steps=5) 1340 scores = regressor.evaluate( 1341 input_fn=_input_fn, 1342 steps=1, 1343 metrics={ 1344 'my_error': metric_ops.streaming_mean_squared_error, 1345 ('my_metric', 'scores'): _my_metric_op 1346 }) 1347 self.assertIn('loss', set(scores.keys())) 1348 self.assertIn('my_error', set(scores.keys())) 1349 self.assertIn('my_metric', set(scores.keys())) 1350 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1351 predictions = np.array(list(regressor.predict_scores( 1352 input_fn=predict_input_fn))) 1353 self.assertAlmostEqual( 1354 _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), 1355 scores['my_error']) 1356 1357 # Tests the case that the 2nd element of the key is not "scores". 1358 with self.assertRaises(KeyError): 1359 regressor.evaluate( 1360 input_fn=_input_fn, 1361 steps=1, 1362 metrics={ 1363 ('my_error', 'predictions'): 1364 metric_ops.streaming_mean_squared_error 1365 }) 1366 1367 # Tests the case where the tuple of the key doesn't have 2 elements. 1368 with self.assertRaises(ValueError): 1369 regressor.evaluate( 1370 input_fn=_input_fn, 1371 steps=1, 1372 metrics={ 1373 ('bad_length_name', 'scores', 'bad_length'): 1374 metric_ops.streaming_mean_squared_error 1375 }) 1376 1377 def testCustomMetricsWithMetricSpec(self): 1378 """Tests custom evaluation metrics that use MetricSpec.""" 1379 1380 def _input_fn(num_epochs=None): 1381 # Create 4 rows, one of them (y = x), three of them (y=Not(x)) 1382 labels = constant_op.constant([[1.], [0.], [0.], [0.]]) 1383 features = { 1384 'x': 1385 input_lib.limit_epochs( 1386 array_ops.ones( 1387 shape=[4, 1], dtype=dtypes.float32), 1388 num_epochs=num_epochs), 1389 } 1390 return features, labels 1391 1392 def _my_metric_op(predictions, labels): 1393 return math_ops.reduce_sum(math_ops.multiply(predictions, labels)) 1394 1395 regressor = dnn.DNNRegressor( 1396 feature_columns=[feature_column.real_valued_column('x')], 1397 hidden_units=[3, 3], 1398 config=run_config.RunConfig(tf_random_seed=1)) 1399 1400 regressor.fit(input_fn=_input_fn, steps=5) 1401 scores = regressor.evaluate( 1402 input_fn=_input_fn, 1403 steps=1, 1404 metrics={ 1405 'my_error': 1406 MetricSpec( 1407 metric_fn=metric_ops.streaming_mean_squared_error, 1408 prediction_key='scores'), 1409 'my_metric': 1410 MetricSpec( 1411 metric_fn=_my_metric_op, prediction_key='scores') 1412 }) 1413 self.assertIn('loss', set(scores.keys())) 1414 self.assertIn('my_error', set(scores.keys())) 1415 self.assertIn('my_metric', set(scores.keys())) 1416 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1417 predictions = np.array(list(regressor.predict_scores( 1418 input_fn=predict_input_fn))) 1419 self.assertAlmostEqual( 1420 _sklearn.mean_squared_error(np.array([1, 0, 0, 0]), predictions), 1421 scores['my_error']) 1422 1423 # Tests the case where the prediction_key is not "scores". 1424 with self.assertRaisesRegexp(KeyError, 'bad_type'): 1425 regressor.evaluate( 1426 input_fn=_input_fn, 1427 steps=1, 1428 metrics={ 1429 'bad_name': 1430 MetricSpec( 1431 metric_fn=metric_ops.streaming_auc, 1432 prediction_key='bad_type') 1433 }) 1434 1435 def testTrainSaveLoad(self): 1436 """Tests that insures you can save and reload a trained model.""" 1437 1438 def _input_fn(num_epochs=None): 1439 features = { 1440 'age': 1441 input_lib.limit_epochs( 1442 constant_op.constant([[0.8], [0.15], [0.]]), 1443 num_epochs=num_epochs), 1444 'language': 1445 sparse_tensor.SparseTensor( 1446 values=input_lib.limit_epochs( 1447 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1448 indices=[[0, 0], [0, 1], [2, 0]], 1449 dense_shape=[3, 2]) 1450 } 1451 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1452 1453 sparse_column = feature_column.sparse_column_with_hash_bucket( 1454 'language', hash_bucket_size=20) 1455 feature_columns = [ 1456 feature_column.embedding_column( 1457 sparse_column, dimension=1), 1458 feature_column.real_valued_column('age') 1459 ] 1460 1461 model_dir = tempfile.mkdtemp() 1462 regressor = dnn.DNNRegressor( 1463 model_dir=model_dir, 1464 feature_columns=feature_columns, 1465 hidden_units=[3, 3], 1466 config=run_config.RunConfig(tf_random_seed=1)) 1467 1468 regressor.fit(input_fn=_input_fn, steps=5) 1469 predict_input_fn = functools.partial(_input_fn, num_epochs=1) 1470 predictions = list(regressor.predict_scores(input_fn=predict_input_fn)) 1471 del regressor 1472 1473 regressor2 = dnn.DNNRegressor( 1474 model_dir=model_dir, 1475 feature_columns=feature_columns, 1476 hidden_units=[3, 3], 1477 config=run_config.RunConfig(tf_random_seed=1)) 1478 predictions2 = list(regressor2.predict_scores(input_fn=predict_input_fn)) 1479 self.assertAllClose(predictions, predictions2) 1480 1481 def testTrainWithPartitionedVariables(self): 1482 """Tests training with partitioned variables.""" 1483 1484 def _input_fn(num_epochs=None): 1485 features = { 1486 'age': 1487 input_lib.limit_epochs( 1488 constant_op.constant([[0.8], [0.15], [0.]]), 1489 num_epochs=num_epochs), 1490 'language': 1491 sparse_tensor.SparseTensor( 1492 values=input_lib.limit_epochs( 1493 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1494 indices=[[0, 0], [0, 1], [2, 0]], 1495 dense_shape=[3, 2]) 1496 } 1497 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1498 1499 # The given hash_bucket_size results in variables larger than the 1500 # default min_slice_size attribute, so the variables are partitioned. 1501 sparse_column = feature_column.sparse_column_with_hash_bucket( 1502 'language', hash_bucket_size=2e7) 1503 feature_columns = [ 1504 feature_column.embedding_column( 1505 sparse_column, dimension=1), 1506 feature_column.real_valued_column('age') 1507 ] 1508 1509 tf_config = { 1510 'cluster': { 1511 run_config.TaskType.PS: ['fake_ps_0', 'fake_ps_1'] 1512 } 1513 } 1514 with test.mock.patch.dict('os.environ', 1515 {'TF_CONFIG': json.dumps(tf_config)}): 1516 config = run_config.RunConfig(tf_random_seed=1) 1517 # Because we did not start a distributed cluster, we need to pass an 1518 # empty ClusterSpec, otherwise the device_setter will look for 1519 # distributed jobs, such as "/job:ps" which are not present. 1520 config._cluster_spec = server_lib.ClusterSpec({}) 1521 1522 regressor = dnn.DNNRegressor( 1523 feature_columns=feature_columns, hidden_units=[3, 3], config=config) 1524 1525 regressor.fit(input_fn=_input_fn, steps=5) 1526 1527 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1528 self.assertIn('loss', scores) 1529 1530 def testEnableCenteredBias(self): 1531 """Tests that we can enable centered bias.""" 1532 1533 def _input_fn(num_epochs=None): 1534 features = { 1535 'age': 1536 input_lib.limit_epochs( 1537 constant_op.constant([[0.8], [0.15], [0.]]), 1538 num_epochs=num_epochs), 1539 'language': 1540 sparse_tensor.SparseTensor( 1541 values=input_lib.limit_epochs( 1542 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1543 indices=[[0, 0], [0, 1], [2, 0]], 1544 dense_shape=[3, 2]) 1545 } 1546 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1547 1548 sparse_column = feature_column.sparse_column_with_hash_bucket( 1549 'language', hash_bucket_size=20) 1550 feature_columns = [ 1551 feature_column.embedding_column( 1552 sparse_column, dimension=1), 1553 feature_column.real_valued_column('age') 1554 ] 1555 1556 regressor = dnn.DNNRegressor( 1557 feature_columns=feature_columns, 1558 hidden_units=[3, 3], 1559 enable_centered_bias=True, 1560 config=run_config.RunConfig(tf_random_seed=1)) 1561 1562 regressor.fit(input_fn=_input_fn, steps=5) 1563 self.assertIn('dnn/regression_head/centered_bias_weight', 1564 regressor.get_variable_names()) 1565 1566 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1567 self.assertIn('loss', scores) 1568 1569 def testDisableCenteredBias(self): 1570 """Tests that we can disable centered bias.""" 1571 1572 def _input_fn(num_epochs=None): 1573 features = { 1574 'age': 1575 input_lib.limit_epochs( 1576 constant_op.constant([[0.8], [0.15], [0.]]), 1577 num_epochs=num_epochs), 1578 'language': 1579 sparse_tensor.SparseTensor( 1580 values=input_lib.limit_epochs( 1581 ['en', 'fr', 'zh'], num_epochs=num_epochs), 1582 indices=[[0, 0], [0, 1], [2, 0]], 1583 dense_shape=[3, 2]) 1584 } 1585 return features, constant_op.constant([1., 0., 0.2], dtype=dtypes.float32) 1586 1587 sparse_column = feature_column.sparse_column_with_hash_bucket( 1588 'language', hash_bucket_size=20) 1589 feature_columns = [ 1590 feature_column.embedding_column( 1591 sparse_column, dimension=1), 1592 feature_column.real_valued_column('age') 1593 ] 1594 1595 regressor = dnn.DNNRegressor( 1596 feature_columns=feature_columns, 1597 hidden_units=[3, 3], 1598 enable_centered_bias=False, 1599 config=run_config.RunConfig(tf_random_seed=1)) 1600 1601 regressor.fit(input_fn=_input_fn, steps=5) 1602 self.assertNotIn('centered_bias_weight', regressor.get_variable_names()) 1603 1604 scores = regressor.evaluate(input_fn=_input_fn, steps=1) 1605 self.assertIn('loss', scores) 1606 1607 1608def boston_input_fn(): 1609 boston = base.load_boston() 1610 features = math_ops.cast( 1611 array_ops.reshape(constant_op.constant(boston.data), [-1, 13]), 1612 dtypes.float32) 1613 labels = math_ops.cast( 1614 array_ops.reshape(constant_op.constant(boston.target), [-1, 1]), 1615 dtypes.float32) 1616 return features, labels 1617 1618 1619class FeatureColumnTest(test.TestCase): 1620 1621 def testTrain(self): 1622 feature_columns = estimator.infer_real_valued_columns_from_input_fn( 1623 boston_input_fn) 1624 est = dnn.DNNRegressor(feature_columns=feature_columns, hidden_units=[3, 3]) 1625 est.fit(input_fn=boston_input_fn, steps=1) 1626 _ = est.evaluate(input_fn=boston_input_fn, steps=1) 1627 1628 1629if __name__ == '__main__': 1630 test.main() 1631