1# Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Tests for Cudnn RNN models.""" 16from __future__ import absolute_import 17from __future__ import division 18from __future__ import print_function 19 20import argparse 21import collections 22import functools 23import itertools 24import os 25import sys 26import unittest 27 28import numpy as np 29 30from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn 31from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops 32from tensorflow.contrib.rnn.python.ops import rnn as contrib_rnn_lib 33from tensorflow.python.eager import backprop 34from tensorflow.python.eager import context 35from tensorflow.python.framework import dtypes 36from tensorflow.python.framework import ops 37from tensorflow.python.framework import random_seed 38from tensorflow.python.framework import test_util 39from tensorflow.python.ops import array_ops 40from tensorflow.python.ops import control_flow_ops 41from tensorflow.python.ops import gen_nn_ops 42from tensorflow.python.ops import gradients_impl as gradients 43from tensorflow.python.ops import init_ops 44from tensorflow.python.ops import math_ops 45from tensorflow.python.ops import random_ops 46from tensorflow.python.ops import rnn as rnn_lib 47from tensorflow.python.ops import rnn_cell_impl 48from tensorflow.python.ops import state_ops 49from tensorflow.python.ops import variable_scope as vs 50from tensorflow.python.ops import variables 51from tensorflow.python.ops.losses import losses 52from tensorflow.python.platform import googletest 53from tensorflow.python.platform import test 54from tensorflow.python.platform import tf_logging as logging 55from tensorflow.python.training import adagrad 56from tensorflow.python.training import adam 57from tensorflow.python.training import gradient_descent 58from tensorflow.python.training import momentum 59from tensorflow.python.training import rmsprop 60from tensorflow.python.training import saver as saver_lib 61from tensorflow.python.training.tracking import util as trackable_utils 62 63 64CUDNN_LSTM = cudnn_rnn_ops.CUDNN_LSTM 65CUDNN_GRU = cudnn_rnn_ops.CUDNN_GRU 66CUDNN_RNN_RELU = cudnn_rnn_ops.CUDNN_RNN_RELU 67CUDNN_RNN_TANH = cudnn_rnn_ops.CUDNN_RNN_TANH 68CUDNN_RNN_UNIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_UNIDIRECTION 69CUDNN_RNN_BIDIRECTION = cudnn_rnn_ops.CUDNN_RNN_BIDIRECTION 70 71CUDNN_LSTM_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_LSTM_PARAMS_PER_LAYER 72CUDNN_GRU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_GRU_PARAMS_PER_LAYER 73CUDNN_RNN_TANH_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_TANH_PARAMS_PER_LAYER 74CUDNN_RNN_RELU_PARAMS_PER_LAYER = cudnn_rnn_ops.CUDNN_RNN_RELU_PARAMS_PER_LAYER 75 76 77class CudnnTestModel(object): 78 """Model with convenient APIs for easier building and running test graph. 79 80 The graph built is used by all tests below to avoid repeatedly building 81 similar test graphs. 82 """ 83 84 def __init__(self, 85 rnn_mode, 86 num_layers, 87 num_units, 88 input_size, 89 direction=CUDNN_RNN_UNIDIRECTION, 90 dropout=0., 91 dtype=dtypes.float32, 92 training=False, 93 seed=None, 94 kernel_initializer=None, 95 bias_initializer=None): 96 if dtype not in (dtypes.float16, dtypes.float32, dtypes.float64): 97 raise ValueError("Invalid dtype: %s" % dtype) 98 self._dtype = dtype 99 100 self._inputs = array_ops.placeholder( 101 dtype=dtype, shape=[None, None, input_size], name="inputs") 102 h = array_ops.placeholder( 103 dtype=dtype, shape=[None, None, num_units], name="h") 104 c = array_ops.placeholder( 105 dtype=dtype, shape=[None, None, num_units], name="c") 106 if rnn_mode == CUDNN_LSTM: 107 model_fn = cudnn_rnn.CudnnLSTM 108 self._initial_state = (h, c) 109 elif rnn_mode == CUDNN_GRU: 110 model_fn = cudnn_rnn.CudnnGRU 111 self._initial_state = (h,) 112 elif rnn_mode == CUDNN_RNN_TANH: 113 model_fn = cudnn_rnn.CudnnRNNTanh 114 self._initial_state = (h,) 115 elif rnn_mode == CUDNN_RNN_RELU: 116 model_fn = cudnn_rnn.CudnnRNNRelu 117 self._initial_state = (h,) 118 else: 119 raise ValueError("Invalid rnn_mode: %s" % rnn_mode) 120 self._rnn = model_fn( 121 num_layers, 122 num_units, 123 direction=direction, 124 dropout=dropout, 125 dtype=dtype, 126 seed=seed, 127 kernel_initializer=kernel_initializer, 128 bias_initializer=bias_initializer) 129 self._rnn.build([None, None, input_size]) 130 131 self._outputs, self._output_state = self._rnn( 132 self._inputs, initial_state=self._initial_state, training=training) 133 134 def _AddUp(self, outputs, output_state): 135 total = math_ops.reduce_sum(outputs) 136 for s in output_state: 137 total += math_ops.reduce_sum(s) 138 return total 139 140 @property 141 def inputs(self): 142 return self._inputs 143 144 @property 145 def initial_state(self): 146 return self._initial_state 147 148 @property 149 def outputs(self): 150 return self._outputs 151 152 @property 153 def output_state(self): 154 return self._output_state 155 156 @property 157 def rnn(self): 158 return self._rnn 159 160 @property 161 def total_sum(self): 162 return self._AddUp(self.outputs, self.output_state) 163 164 def SynthesizeInput(self, seq_length, batch_size, seed=1234): 165 """Synthesizes input and initial state values for testing.""" 166 np.random.seed(seed) 167 num_layers = self._rnn.num_layers 168 dir_count = self._rnn.num_dirs 169 num_units = self._rnn.num_units 170 input_size = self._rnn.input_size 171 172 np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 173 inputs = np.random.randn(seq_length, batch_size, 174 input_size).astype(np_dtype) 175 input_h = np.random.randn(num_layers * dir_count, batch_size, 176 num_units).astype(np_dtype) 177 if self._rnn.rnn_mode == CUDNN_LSTM: 178 input_c = np.random.randn(num_layers * dir_count, batch_size, 179 num_units).astype(np_dtype) 180 initial_state = (input_h, input_c) 181 else: 182 initial_state = (input_h,) 183 return inputs, initial_state 184 185 def ZeroState(self, batch_size): 186 num_layers = self._rnn.num_layers 187 dir_count = self._rnn.num_dirs 188 num_units = self._rnn.num_units 189 190 np_dtype = np.float32 if self._dtype == dtypes.float32 else np.float64 191 input_h = np.zeros((num_layers * dir_count, batch_size, 192 num_units)).astype(np_dtype) 193 if self._rnn.rnn_mode == CUDNN_LSTM: 194 input_c = np.zeros((num_layers * dir_count, batch_size, 195 num_units)).astype(np_dtype) 196 initial_state = (input_h, input_c) 197 else: 198 initial_state = (input_h,) 199 return initial_state 200 201 def FProp(self, inputs_t, initial_state_t, training): 202 """Builds additional subgraph with given inputs and state. 203 204 Args: 205 inputs_t: a tensor. 206 initial_state_t: a tensor. 207 training: boolean, true if training mode. 208 Returns: 209 A tensor of the forward pass output of the model. 210 """ 211 outputs, output_state = self._rnn( 212 inputs_t, initial_state=initial_state_t, training=training) 213 return self._AddUp(outputs, output_state) 214 215 def Feed(self, sess, inputs, initial_state=None, return_sum=True): 216 """Runs graph with given inputs and initial state.""" 217 batch_size = inputs.shape[1] 218 if initial_state is None: 219 initial_state = self.ZeroState(batch_size) 220 if return_sum: 221 return sess.run( 222 self.total_sum, 223 feed_dict={self.inputs: inputs, 224 self.initial_state: initial_state}) 225 else: 226 return sess.run( 227 [self.outputs, self.output_state], 228 feed_dict={self.inputs: inputs, 229 self.initial_state: initial_state}) 230 231 232def _CreateCudnnCompatibleCanonicalRNN(rnn, inputs, is_bidi=False, scope=None): 233 mode = rnn.rnn_mode 234 num_units = rnn.num_units 235 num_layers = rnn.num_layers 236 237 # To reuse cuDNN-trained models, must use cudnn compatible rnn cells. 238 if mode == CUDNN_LSTM: 239 single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleLSTMCell(num_units) 240 elif mode == CUDNN_GRU: 241 single_cell = lambda: cudnn_rnn_ops.CudnnCompatibleGRUCell(num_units) 242 elif mode == CUDNN_RNN_TANH: 243 single_cell = (lambda: rnn_cell_impl.BasicRNNCell(num_units, math_ops.tanh)) 244 elif mode == CUDNN_RNN_RELU: 245 single_cell = ( 246 lambda: rnn_cell_impl.BasicRNNCell(num_units, gen_nn_ops.relu)) 247 else: 248 raise ValueError("%s is not supported!" % mode) 249 250 if not is_bidi: 251 cell = rnn_cell_impl.MultiRNNCell( 252 [single_cell() for _ in range(num_layers)]) 253 return rnn_lib.dynamic_rnn( 254 cell, inputs, dtype=dtypes.float32, time_major=True, scope=scope) 255 else: 256 cells_fw = [single_cell() for _ in range(num_layers)] 257 cells_bw = [single_cell() for _ in range(num_layers)] 258 259 (outputs, output_state_fw, 260 output_state_bw) = contrib_rnn_lib.stack_bidirectional_dynamic_rnn( 261 cells_fw, 262 cells_bw, 263 inputs, 264 dtype=dtypes.float32, 265 time_major=True, 266 scope=scope) 267 return outputs, (output_state_fw, output_state_bw) 268 269 270class CudnnRNNTestBasic(test_util.TensorFlowTestCase): 271 272 @unittest.skipUnless(test.is_built_with_cuda(), 273 "Test only applicable when running on GPUs") 274 def testLayerBasic(self): 275 num_layers = 4 276 num_units = 2 277 batch_size = 8 278 direction = CUDNN_RNN_UNIDIRECTION 279 dir_count = 1 280 281 with vs.variable_scope("main"): 282 kernel_initializer = init_ops.constant_initializer(0.) 283 bias_initializer = init_ops.constant_initializer(0.) 284 inputs = random_ops.random_uniform([ 285 num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) 286 287 lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, 288 direction=direction, 289 kernel_initializer=kernel_initializer, 290 bias_initializer=bias_initializer, 291 name="awesome_lstm") 292 293 # Build the layer 294 outputs1, _ = lstm(inputs) 295 # Reuse the layer 296 outputs2, _ = lstm(inputs) 297 298 total_sum1 = math_ops.reduce_sum(outputs1) 299 total_sum2 = math_ops.reduce_sum(outputs2) 300 301 with vs.variable_scope("main", reuse=True): 302 lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, 303 direction=direction, 304 kernel_initializer=kernel_initializer, 305 bias_initializer=bias_initializer, 306 name="awesome_lstm") 307 308 # Reuse the layer 309 outputs3, _ = lstm(inputs) 310 total_sum3 = math_ops.reduce_sum(outputs3) 311 312 self.assertEqual(1, len(variables.trainable_variables())) 313 self.assertEqual(1, len(ops.get_collection(ops.GraphKeys.SAVEABLE_OBJECTS))) 314 self.assertEqual("main/awesome_lstm/opaque_kernel", 315 variables.trainable_variables()[0].op.name) 316 317 with self.test_session(use_gpu=True) as sess: 318 sess.run(variables.global_variables_initializer()) 319 (total_sum1_v, total_sum2_v, total_sum3_v) = sess.run( 320 [total_sum1, total_sum2, total_sum3]) 321 self.assertEqual(0, total_sum1_v) 322 self.assertEqual(0, total_sum2_v) 323 self.assertEqual(0, total_sum3_v) 324 325 @unittest.skipUnless(test.is_built_with_cuda(), 326 "Test only applicable when running on GPUs") 327 def testOptimizersSupport(self): 328 for opt in ("adagrad", "adam", "rmsprop", "momentum", "sgd"): 329 self._TestOptimizerSupportHelper(opt) 330 331 def _GetOptimizer(self, opt): 332 if opt == "adagrad": 333 return adagrad.AdagradOptimizer(learning_rate=1e-2) 334 elif opt == "adam": 335 return adam.AdamOptimizer(learning_rate=1e-2) 336 elif opt == "rmsprop": 337 return rmsprop.RMSPropOptimizer(learning_rate=1e-2) 338 elif opt == "momentum": 339 return momentum.MomentumOptimizer(learning_rate=1e-2, momentum=0.9) 340 elif opt == "sgd": 341 return gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) 342 else: 343 raise ValueError("Unsupported optimizer: %s" % opt) 344 345 def _TestOptimizerSupportHelper(self, opt): 346 num_layers = 4 347 num_units = 2 348 batch_size = 8 349 direction = CUDNN_RNN_UNIDIRECTION 350 dir_count = 1 351 352 with ops.Graph().as_default() as g: 353 kernel_initializer = init_ops.constant_initializer(0.) 354 bias_initializer = init_ops.constant_initializer(0.) 355 inputs = random_ops.random_uniform([ 356 num_layers * dir_count, batch_size, num_units], dtype=dtypes.float32) 357 358 lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, 359 direction=direction, 360 kernel_initializer=kernel_initializer, 361 bias_initializer=bias_initializer, 362 name="awesome_lstm") 363 outputs, _ = lstm(inputs) 364 loss = math_ops.reduce_sum(outputs) 365 optimizer = self._GetOptimizer(opt) 366 train_op = optimizer.minimize(loss) 367 368 with self.test_session(use_gpu=True, graph=g) as sess: 369 sess.run(variables.global_variables_initializer()) 370 sess.run(train_op) 371 372 @unittest.skipUnless(test.is_built_with_cuda(), 373 "Test only applicable when running on GPUs") 374 def testSaveableGraphDeviceAssignment(self): 375 num_layers = 4 376 num_units = 2 377 batch_size = 8 378 direction = CUDNN_RNN_UNIDIRECTION 379 dir_count = 1 380 381 def DeviceFn(op): 382 if op.type in ("Variable", "VariableV2"): 383 return "/cpu:0" 384 else: 385 return "/gpu:0" 386 387 with ops.Graph().as_default() as g: 388 with ops.device(DeviceFn): 389 with vs.variable_scope("main"): 390 kernel_initializer = init_ops.constant_initializer(3.14) 391 bias_initializer = init_ops.constant_initializer(1.59) 392 inputs = random_ops.random_uniform( 393 [num_layers * dir_count, batch_size, num_units], 394 dtype=dtypes.float32) 395 396 lstm = cudnn_rnn.CudnnLSTM(num_layers, num_units, 397 direction=direction, 398 kernel_initializer=kernel_initializer, 399 bias_initializer=bias_initializer, 400 name="awesome_lstm") 401 outputs = lstm(inputs) 402 403 # saver is created in the scope of DeviceFn. 404 saver = saver_lib.Saver() 405 406 with self.test_session(use_gpu=True, graph=g) as sess: 407 save_path = os.path.join(self.get_temp_dir(), 408 "test-saveable-device-assignment") 409 sess.run(variables.global_variables_initializer()) 410 411 saver.save(sess, save_path) 412 saver.restore(sess, save_path) 413 sess.run(outputs) 414 415 @unittest.skipUnless(test.is_built_with_cuda(), 416 "Test only applicable when running on GPUs") 417 def testDifferentShapesEager(self): 418 # Checks that kernel caching does not cause sharing of temporary storage 419 # across different input shapes when executing eagerly. 420 with context.eager_mode(): 421 with ops.device("gpu:0"): 422 first_output, _ = cudnn_rnn.CudnnGRU(1, 100)( 423 array_ops.zeros([28, 100, 28])) 424 second_output, _ = cudnn_rnn.CudnnGRU(1, 100)( 425 array_ops.zeros([28, 100, 100])) 426 self.assertAllEqual([28, 100, 100], first_output.shape) 427 self.assertAllEqual([28, 100, 100], second_output.shape) 428 429 def _LossFunc(): 430 first_output, _ = cudnn_rnn.CudnnGRU(1, 100)( 431 array_ops.zeros([28, 100, 28])) 432 second_output, _ = cudnn_rnn.CudnnGRU(1, 100)( 433 array_ops.zeros([28, 100, 100])) 434 return (math_ops.reduce_sum(first_output) + 435 math_ops.reduce_sum(second_output)) 436 437 backprop.implicit_grad(_LossFunc)() 438 439 @unittest.skipUnless(test.is_built_with_cuda(), 440 "Test only applicable when running on GPUs") 441 def testDifferentShapesGraph(self): 442 # Tests that a single kernel instance presented with multiple input shapes 443 # does not crash with graph execution. 444 with ops.device("gpu:0"): 445 layer = cudnn_rnn.CudnnGRU(1, 100) 446 layer(array_ops.zeros([28, 100, 100])) 447 448 def _Cond(index, accumulation): 449 del accumulation # unused 450 return math_ops.less(index, 4) 451 452 def _Body(index, accumulation): 453 layer_input = accumulation[:, :, 10 * (1 + index % 2):] 454 output, _ = layer(layer_input) 455 return index + 1, accumulation + output 456 457 original_input = array_ops.zeros([28, 100, 100]) 458 _, accumulation = control_flow_ops.while_loop(_Cond, _Body, 459 [0, original_input]) 460 grad, = gradients.gradients( 461 math_ops.reduce_sum(accumulation), (original_input,)) 462 init_op = variables.global_variables_initializer() 463 with self.cached_session() as sess: 464 sess.run(init_op) 465 accumulation_eval, grad_eval = sess.run((accumulation, grad)) 466 self.assertAllEqual([28, 100, 100], accumulation_eval.shape) 467 self.assertAllEqual([28, 100, 100], grad_eval.shape) 468 469 470# TODO(jamesqin): Transform to parameterized test after it is included in the 471# TF open source codebase. 472class CudnnRNNTestSaveRestore(test_util.TensorFlowTestCase): 473 474 def _CompareWeights(self, lhs, rhs): 475 self.assertEqual(len(lhs), len(rhs)) 476 for lw, rw in zip(lhs, rhs): 477 self.assertAllEqual(lw, rw) 478 479 def _CompareBiases(self, lhs, rhs, rnn_mode, num_layers, direction): 480 self.assertEqual(len(lhs), len(rhs)) 481 if rnn_mode == CUDNN_LSTM: 482 num_params_per_layer = CUDNN_LSTM_PARAMS_PER_LAYER 483 elif rnn_mode == CUDNN_GRU: 484 num_params_per_layer = CUDNN_GRU_PARAMS_PER_LAYER 485 elif rnn_mode == CUDNN_RNN_TANH: 486 num_params_per_layer = CUDNN_RNN_TANH_PARAMS_PER_LAYER 487 else: 488 num_params_per_layer = CUDNN_RNN_RELU_PARAMS_PER_LAYER 489 num_dirs = 1 if direction == CUDNN_RNN_UNIDIRECTION else 2 490 num_params_per_layer *= num_dirs 491 self.assertEqual(num_params_per_layer * num_layers, len(lhs)) 492 493 for i in range(num_layers): 494 layer_lhs = lhs[i * num_params_per_layer: (i+1) * num_params_per_layer] 495 layer_rhs = rhs[i * num_params_per_layer: (i+1) * num_params_per_layer] 496 if direction == CUDNN_RNN_UNIDIRECTION: 497 self._CompareSingleLayerBiases(layer_lhs, layer_rhs) 498 else: 499 size = len(layer_lhs) 500 fw_lhs, bw_lhs = layer_lhs[:size//2], layer_lhs[size//2:] 501 fw_rhs, bw_rhs = layer_rhs[:size//2], layer_rhs[size//2:] 502 self._CompareSingleLayerBiases(fw_lhs, fw_rhs) 503 self._CompareSingleLayerBiases(bw_lhs, bw_rhs) 504 505 def _CompareSingleLayerBiases(self, lhs, rhs): 506 self.assertEqual(len(lhs), len(rhs)) 507 508 lf_lhs, rt_lhs = lhs[:len(lhs)//2], lhs[len(lhs)//2:] 509 lf_rhs, rt_rhs = rhs[:len(rhs)//2], rhs[len(rhs)//2:] 510 self.assertEqual(len(lf_lhs), len(rt_lhs)) 511 self.assertEqual(len(lf_rhs), len(rt_rhs)) 512 513 sum_lhs, sum_rhs = [], [] 514 for lf, rt in zip(lf_lhs, rt_lhs): 515 sum_lhs.append(lf + rt) 516 for lf, rt in zip(lf_rhs, rt_rhs): 517 sum_rhs.append(lf + rt) 518 self.assertEqual(len(sum_lhs), len(sum_rhs)) 519 for lf, rt in zip(sum_lhs, sum_rhs): 520 self.assertAllEqual(lf, rt) 521 522 def _TestSaveRestoreVariable(self, rnn_mode, direction, dtype): 523 input_size = 3 524 num_layers = 2 525 num_units = 7 526 with ops.Graph().as_default() as g: 527 random_seed.set_random_seed(1234) 528 model = CudnnTestModel( 529 rnn_mode, 530 num_layers, 531 num_units, 532 input_size, 533 direction=direction, 534 dtype=dtype) 535 rnn = model.rnn 536 save_path = os.path.join(self.get_temp_dir(), 537 "save-restore-variable-test") 538 saver = saver_lib.Saver() 539 weights, biases = ( 540 model.rnn.saveable.format_converter._opaque_to_cu_canonical( 541 model.rnn.saveable._variables)) 542 opaque_params = rnn.trainable_variables[0] 543 # CudnnTestModel() creates CudnnOpaqueParamsSaveable that helps saver save 544 # Cudnn vars in canonical format. 545 reset_op = state_ops.assign( 546 opaque_params, 547 array_ops.zeros(array_ops.shape(opaque_params), dtype=dtype)) 548 # Passing graph explicitly, otherwise an old sess would be reused. 549 with self.test_session(use_gpu=True, graph=g) as sess: 550 sess.run(variables.global_variables_initializer()) 551 val = saver.save(sess, save_path) 552 self.assertEqual(save_path, val) 553 weights_v, biases_v = sess.run([weights, biases]) 554 555 # Reset opaque param 556 sess.run(reset_op) 557 saver.restore(sess, save_path) 558 weights_v_restored, biases_v_restored = sess.run([weights, biases]) 559 560 self._CompareWeights(weights_v, weights_v_restored) 561 self._CompareBiases(biases_v, biases_v_restored, rnn_mode, num_layers, 562 direction) 563 564 def _TestSaveRestoreTwoVariables(self, rnn_mode, direction, dtype): 565 input_size = 3 566 num_layers = 2 567 num_units = 7 568 with ops.Graph().as_default() as g: 569 random_seed.set_random_seed(1234) 570 with vs.variable_scope("m1"): 571 model1 = CudnnTestModel( 572 rnn_mode, 573 num_layers, 574 num_units, 575 input_size, 576 direction=direction, 577 dtype=dtype) 578 with vs.variable_scope("m2"): 579 model2 = CudnnTestModel( 580 rnn_mode, 581 num_layers, 582 num_units, 583 input_size, 584 direction=direction, 585 dtype=dtype) 586 opaque_params = (model1.rnn.trainable_variables[0], 587 model2.rnn.trainable_variables[0]) 588 saveable1 = model1.rnn.saveable 589 weights1, biases1 = saveable1.format_converter._opaque_to_cu_canonical( 590 saveable1._variables) 591 saveable2 = model1.rnn.saveable 592 weights2, biases2 = saveable2.format_converter._opaque_to_cu_canonical( 593 saveable2._variables) 594 reset_params = [ 595 state_ops.assign(params, 596 array_ops.zeros_like(params, dtype=dtype)) 597 for params in opaque_params 598 ] 599 reset_op = control_flow_ops.group(*reset_params) 600 save_path = os.path.join(self.get_temp_dir(), 601 "save-restore-variable-test2") 602 saver = saver_lib.Saver() 603 # Passing graph explicitly, otherwise an old sess would be reused. 604 with self.test_session(use_gpu=True, graph=g) as sess: 605 sess.run(variables.global_variables_initializer()) 606 val = saver.save(sess, save_path) 607 self.assertEqual(save_path, val) 608 609 weights1_v, biases1_v = sess.run([weights1, biases1]) 610 weights2_v, biases2_v = sess.run([weights2, biases2]) 611 612 sess.run(reset_op) 613 saver.restore(sess, save_path) 614 weights1_v_restored, biases1_v_restored = sess.run([weights1, biases1]) 615 weights2_v_restored, biases2_v_restored = sess.run([weights2, biases2]) 616 617 self._CompareWeights(weights1_v, weights1_v_restored) 618 self._CompareWeights(weights2_v, weights2_v_restored) 619 self._CompareBiases(biases1_v, biases1_v_restored, rnn_mode, num_layers, 620 direction) 621 self._CompareBiases(biases2_v, biases2_v_restored, rnn_mode, num_layers, 622 direction) 623 624 def _TestSaveRestoreOutput(self, rnn_mode, direction, dtype): 625 with ops.Graph().as_default() as g: 626 num_layers = 2 627 num_units = 7 628 input_size = 7 629 seq_length = 8 630 batch_size = 4 631 model = CudnnTestModel( 632 rnn_mode, 633 num_layers, 634 num_units, 635 input_size, 636 direction=direction, 637 dtype=dtype, 638 training=False) 639 rnn = model.rnn 640 641 save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test") 642 saver = saver_lib.Saver() 643 644 # Only one opaque var in a cudnn layer. 645 assert len(rnn.trainable_variables) == 1 646 reset_params = state_ops.assign( 647 rnn.trainable_variables[0], 648 array_ops.zeros( 649 array_ops.shape(rnn.trainable_variables[0]), dtype=dtype)) 650 651 # Passing graph explicitly, otherwise an old sess would be reused. 652 with self.test_session(use_gpu=True, graph=g) as sess: 653 sess.run(variables.global_variables_initializer()) 654 inputs, initial_state = model.SynthesizeInput(seq_length, batch_size) 655 total_sum_v = model.Feed(sess, inputs, initial_state) 656 val = saver.save(sess, save_path) 657 self.assertEqual(save_path, val) 658 659 sess.run(reset_params) 660 saver.restore(sess, save_path) 661 total_sum_v_restored = model.Feed(sess, inputs, initial_state) 662 self.assertAllClose(total_sum_v, total_sum_v_restored, atol=1e-5) 663 664 def _TestSaveRestoreHelper(self, rnn_mode): 665 directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] 666 dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64] 667 for direction, dtype in itertools.product(directions, dtype_list): 668 self._TestSaveRestoreVariable(rnn_mode, direction, dtype) 669 self._TestSaveRestoreTwoVariables(rnn_mode, direction, dtype) 670 self._TestSaveRestoreOutput(rnn_mode, direction, dtype) 671 672 @unittest.skipUnless(test.is_built_with_cuda(), 673 "Test only applicable when running on GPUs") 674 def testSaveRestoreRepeatedlyCreateCustomSaveable(self): 675 input_size = 3 676 num_layers = 2 677 num_units = 7 678 with ops.Graph().as_default(): 679 random_seed.set_random_seed(1234) 680 model = CudnnTestModel( 681 CUDNN_LSTM, 682 num_layers, 683 num_units, 684 input_size, 685 direction=CUDNN_RNN_UNIDIRECTION, 686 dtype=dtypes.float32) 687 with self.assertRaisesRegexp(RuntimeError, 688 "Cudnn saveable already created"): 689 model.rnn._create_saveable() 690 691 @unittest.skipUnless(test.is_built_with_cuda(), 692 "Test only applicable when running on GPUs") 693 def testSaveRestoreLSTM(self): 694 self._TestSaveRestoreHelper(CUDNN_LSTM) 695 696 @unittest.skipUnless(test.is_built_with_cuda(), 697 "Test only applicable when running on GPUs") 698 def testSaveRestoreGRU(self): 699 self._TestSaveRestoreHelper(CUDNN_GRU) 700 701 @unittest.skipUnless(test.is_built_with_cuda(), 702 "Test only applicable when running on GPUs") 703 def testSaveRestoreRNNTanh(self): 704 self._TestSaveRestoreHelper(CUDNN_RNN_TANH) 705 706 @unittest.skipUnless(test.is_built_with_cuda(), 707 "Test only applicable when running on GPUs") 708 def testSaveRestoreRNNRelu(self): 709 self._TestSaveRestoreHelper(CUDNN_RNN_RELU) 710 711 712class CudnnRNNTestSaveRestoreTrackable(test_util.TensorFlowTestCase): 713 714 def _VerifyCheckpoint( 715 self, checkpoint_path, compatible_cell_fn, cudnn_cell_fn, 716 num_layers, input_size, expected_variable_values, num_applications=3): 717 checkpoint_directory = self.get_temp_dir() 718 checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") 719 with ops.device("gpu:0"): 720 cudnn_layer = cudnn_cell_fn() 721 cudnn_checkpoint = trackable_utils.Checkpoint(cell=cudnn_layer) 722 status = cudnn_checkpoint.restore(checkpoint_path) 723 inputs = 3. * array_ops.ones([num_applications, num_layers, input_size], 724 dtype=dtypes.float32) 725 cudnn_output, _ = cudnn_layer(inputs) 726 status.run_restore_ops() 727 second_save_path = cudnn_checkpoint.save(checkpoint_prefix) 728 restore_layer = compatible_cell_fn() 729 restore_layer_checkpoint = trackable_utils.Checkpoint( 730 cell=restore_layer) 731 status = restore_layer_checkpoint.restore(second_save_path) 732 current_state = restore_layer.zero_state(1, dtypes.float32) 733 for _ in range(num_applications): 734 restore_layer_output, current_state = restore_layer( 735 inputs=3. * array_ops.ones([1, input_size]), 736 state=current_state) 737 status.run_restore_ops() 738 self.assertTrue(restore_layer.variables) 739 for variable, expected_value in zip( 740 restore_layer.variables, expected_variable_values): 741 self.assertAllClose(expected_value, self.evaluate(variable)) 742 self.assertAllClose(self.evaluate(restore_layer_output), 743 self.evaluate(cudnn_output)[-1, -1:, ...]) 744 745 def _TrackableSingleCellUnidirectionalTestTemplate( 746 self, single_cell_fn, cudnn_cell_fn): 747 # Single-layer cuDNN cells with object-based checkpointing should be 748 # checkpoint compatible with either single CudnnCompatible cells or 749 # MultiRnnCells with one cell. 750 input_size = 3 751 save_cell_layer = single_cell_fn() 752 save_cell_layer( 753 inputs=array_ops.ones([1, input_size]), 754 state=save_cell_layer.zero_state(1, dtypes.float32)) 755 self.assertTrue(save_cell_layer.variables) 756 expected_values = [] 757 np.random.seed(10) 758 for variable in save_cell_layer.variables: 759 value = np.random.normal(size=variable.shape) 760 expected_values.append(value) 761 self.evaluate(variable.assign(value)) 762 save_checkpoint = trackable_utils.Checkpoint(cell=save_cell_layer) 763 checkpoint_directory = self.get_temp_dir() 764 checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") 765 first_save_path = save_checkpoint.save(checkpoint_prefix) 766 self._VerifyCheckpoint( 767 checkpoint_path=first_save_path, 768 compatible_cell_fn= 769 lambda: rnn_cell_impl.MultiRNNCell([single_cell_fn()]), 770 cudnn_cell_fn=cudnn_cell_fn, 771 num_layers=1, 772 expected_variable_values=expected_values, 773 input_size=input_size) 774 775 @unittest.skipUnless(test.is_built_with_cuda(), 776 "Test only applicable when running on GPUs") 777 @test_util.run_in_graph_and_eager_modes 778 def testLSTMTrackableSingleLayer(self): 779 num_units = 2 780 direction = CUDNN_RNN_UNIDIRECTION 781 self._TrackableSingleCellUnidirectionalTestTemplate( 782 single_cell_fn=functools.partial( 783 cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), 784 cudnn_cell_fn=functools.partial( 785 cudnn_rnn.CudnnLSTM, num_layers=1, num_units=num_units, 786 direction=direction, name="awesome_lstm")) 787 788 @unittest.skipUnless(test.is_built_with_cuda(), 789 "Test only applicable when running on GPUs") 790 @test_util.run_in_graph_and_eager_modes 791 def testGRUTrackableSingleLayer(self): 792 num_units = 2 793 direction = CUDNN_RNN_UNIDIRECTION 794 with self.assertRaises(NotImplementedError): 795 # TODO(allenl): Implement object-based saving for GRUs and other cells. 796 self._TrackableSingleCellUnidirectionalTestTemplate( 797 single_cell_fn=functools.partial( 798 cudnn_rnn_ops.CudnnCompatibleGRUCell, num_units=num_units), 799 cudnn_cell_fn=functools.partial( 800 cudnn_rnn.CudnnGRU, num_layers=1, num_units=num_units, 801 direction=direction, name="awesome_gru")) 802 803 def _TrackableMultiLayerTestTemplate( 804 self, single_cell_fn, cudnn_cell_fn, num_layers): 805 806 def _MultiCellFn(): 807 return rnn_cell_impl.MultiRNNCell( 808 [single_cell_fn() for _ in range(num_layers)]) 809 input_size = 3 810 save_graph = ops.Graph() 811 with save_graph.as_default(), self.session(graph=save_graph): 812 save_layer = _MultiCellFn() 813 save_layer(inputs=array_ops.ones([1, input_size]), 814 state=save_layer.zero_state(1, dtypes.float32)) 815 self.assertTrue(save_layer.variables) 816 expected_values = [] 817 np.random.seed(10) 818 for variable in save_layer.variables: 819 value = np.random.normal(size=variable.shape) 820 expected_values.append(value) 821 self.evaluate(variable.assign(value)) 822 save_checkpoint = trackable_utils.Checkpoint(cell=save_layer) 823 checkpoint_directory = self.get_temp_dir() 824 checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") 825 first_save_path = save_checkpoint.save(checkpoint_prefix) 826 self._VerifyCheckpoint( 827 checkpoint_path=first_save_path, 828 compatible_cell_fn=_MultiCellFn, cudnn_cell_fn=cudnn_cell_fn, 829 num_layers=num_layers, 830 expected_variable_values=expected_values, 831 input_size=input_size) 832 833 @unittest.skipUnless(test.is_built_with_cuda(), 834 "Test only applicable when running on GPUs") 835 @test_util.run_in_graph_and_eager_modes 836 def testCudnnCompatibleLSTMCheckpointablMultiLayer(self): 837 num_units = 2 838 num_layers = 3 839 direction = CUDNN_RNN_UNIDIRECTION 840 self._TrackableMultiLayerTestTemplate( 841 single_cell_fn=functools.partial( 842 cudnn_rnn_ops.CudnnCompatibleLSTMCell, num_units=num_units), 843 cudnn_cell_fn=functools.partial( 844 cudnn_rnn.CudnnLSTM, num_layers=num_layers, num_units=num_units, 845 direction=direction, name="awesome_lstm"), 846 num_layers=num_layers) 847 848 849# TODO(jamesqin): Transform to parameterized test after it is included in the 850# TF open source codebase. 851class CudnnRNNTestCompatibleRNNCells(test_util.TensorFlowTestCase): 852 853 @unittest.skipUnless(test.is_built_with_cuda(), 854 "Test only applicable when running on GPUs") 855 def testCudnnCompatibleLSTM(self): 856 self._TestCudnnCompatibleRnnCellsHelper(CUDNN_LSTM) 857 858 @unittest.skipUnless(test.is_built_with_cuda(), 859 "Test only applicable when running on GPUs") 860 def testCudnnCompatibleGRU(self): 861 self._TestCudnnCompatibleRnnCellsHelper(CUDNN_GRU) 862 863 @unittest.skipUnless(test.is_built_with_cuda(), 864 "Test only applicable when running on GPUs") 865 def testCudnnCompatibleRNNTanh(self): 866 self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_TANH) 867 868 @unittest.skipUnless(test.is_built_with_cuda(), 869 "Test only applicable when running on GPUs") 870 def testCudnnCompatibleRNNRelu(self): 871 self._TestCudnnCompatibleRnnCellsHelper(CUDNN_RNN_RELU) 872 873 def _TestCudnnCompatibleRnnCellsHelper(self, rnn_mode): 874 configs = [ 875 { 876 "num_layers": 1, 877 "seq_length": 3, 878 "num_units": 4, 879 "input_size": 5, 880 "batch_size": 6, 881 }, 882 { 883 "num_layers": 2, 884 "seq_length": 8, 885 "num_units": 4, 886 "input_size": 8, 887 "batch_size": 16, 888 }, 889 { 890 "num_layers": 2, 891 "seq_length": 3, 892 "num_units": 4, 893 "input_size": 5, 894 "batch_size": 6, 895 }, 896 { 897 "num_layers": 1, 898 "seq_length": 2, 899 "num_units": 2, 900 "input_size": 4, 901 "batch_size": 1, 902 }, 903 ] 904 directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] 905 for cfg, direction in zip(configs, directions): 906 self._TestCudnnCompatibleRnnCells(cfg["num_layers"], cfg["seq_length"], 907 cfg["num_units"], cfg["input_size"], 908 cfg["batch_size"], rnn_mode, direction) 909 910 def _TestCudnnCompatibleRnnCells(self, num_layers, seq_length, num_units, 911 input_size, batch_size, rnn_mode, direction): 912 dtype = dtypes.float32 913 # Train graph 914 with ops.Graph().as_default() as g: 915 model = CudnnTestModel( 916 rnn_mode, 917 num_layers, 918 num_units, 919 input_size, 920 direction=direction, 921 dtype=dtype, 922 training=True) 923 target_output = array_ops.placeholder(dtype=dtype) 924 loss_op = losses.log_loss( 925 labels=target_output, predictions=model.total_sum) 926 optimizer = gradient_descent.GradientDescentOptimizer(learning_rate=1e-2) 927 train_op = optimizer.minimize(loss_op) 928 929 saver = saver_lib.Saver() 930 931 # Train Cudnn model 932 seed = 0 933 with self.test_session(use_gpu=True, graph=g) as sess: 934 sess.run(variables.global_variables_initializer()) 935 # Train 128 steps 936 num_steps = 128 937 for _ in range(num_steps): 938 inputs, _ = model.SynthesizeInput(seq_length, batch_size, seed) 939 targets = np.random.rand() 940 sess.run( 941 train_op, 942 feed_dict={ 943 model.inputs: inputs, 944 model.initial_state: model.ZeroState(batch_size), 945 target_output: targets 946 }) 947 seed += 1 948 949 save_path = os.path.join(self.get_temp_dir(), 950 ("cudnn-rnn-%s-test" % rnn_mode)) 951 save_v = saver.save(sess, save_path) 952 self.assertEqual(save_path, save_v) 953 954 # Cudnn inference graph 955 with ops.Graph().as_default() as g: 956 model = CudnnTestModel( 957 rnn_mode, 958 num_layers, 959 num_units, 960 input_size, 961 direction=direction, 962 dtype=dtype, 963 training=False) 964 rnn = model.rnn 965 saver = saver_lib.Saver() 966 967 inference_input = np.random.rand(seq_length, batch_size, 968 input_size).astype(np.float32) 969 with self.test_session(use_gpu=True, graph=g) as sess: 970 sess.run(variables.global_variables_initializer()) 971 saver.restore(sess, save_path) 972 973 # Cudnn inference 974 cudnn_outputs_v, cudnn_output_states_v = model.Feed( 975 sess, inference_input, return_sum=False) 976 977 # Canonical RNN inference graph 978 with ops.Graph().as_default() as g: 979 cell_inputs = array_ops.placeholder( 980 dtype, shape=[seq_length, batch_size, input_size]) 981 if direction == CUDNN_RNN_UNIDIRECTION: 982 # outputs is one tensor, states are num_layer tuples, each 2 tensors 983 (outputs, states) = _CreateCudnnCompatibleCanonicalRNN(rnn, cell_inputs) 984 if rnn_mode == CUDNN_LSTM: 985 output_h = array_ops.stack([s.h for s in states]) 986 output_c = array_ops.stack([s.c for s in states]) 987 else: 988 output_state = array_ops.stack([s for s in states]) 989 else: 990 # outputs is one tensor. 991 # states is a tuple of 2 tuples: 992 # each sub tuple is num_layer tuples, each with 2 tensors. 993 (outputs, states) = _CreateCudnnCompatibleCanonicalRNN( 994 rnn, cell_inputs, is_bidi=True) 995 output_state_fw, output_state_bw = states 996 if rnn_mode == CUDNN_LSTM: 997 output_h, output_c = [], [] 998 for s_fw, s_bw in zip(output_state_fw, output_state_bw): 999 output_h.append(array_ops.stack([s_fw.h, s_bw.h])) 1000 output_c.append(array_ops.stack([s_fw.c, s_bw.c])) 1001 output_h = array_ops.concat(output_h, axis=0) 1002 output_c = array_ops.concat(output_c, axis=0) 1003 else: 1004 output_state = [] 1005 for s_fw, s_bw in zip(output_state_fw, output_state_bw): 1006 output_state.append(array_ops.stack([s_fw, s_bw])) 1007 output_state = array_ops.concat(output_state, axis=0) 1008 saver = saver_lib.Saver() 1009 1010 with self.test_session(use_gpu=True, graph=g) as sess: 1011 saver.restore(sess, save_path) 1012 1013 # BlockCell inference 1014 if rnn_mode == CUDNN_LSTM: 1015 outputs_v, output_h_v, output_c_v = sess.run( 1016 [outputs, output_h, output_c], 1017 feed_dict={cell_inputs: inference_input}) 1018 self.assertAllClose(cudnn_outputs_v, outputs_v) 1019 cudnn_output_h_v, cudnn_output_c_v = cudnn_output_states_v 1020 self.assertAllClose(cudnn_output_h_v, output_h_v) 1021 self.assertAllClose(cudnn_output_c_v, output_c_v) 1022 else: 1023 outputs_v, output_state_v = sess.run( 1024 [outputs, output_state], 1025 feed_dict={cell_inputs: inference_input}) 1026 self.assertAllClose(cudnn_outputs_v, outputs_v, atol=1e-4, rtol=2e-4) 1027 (cudnn_output_h_v,) = cudnn_output_states_v 1028 self.assertAllClose(cudnn_output_h_v, output_state_v, atol=2e-5, 1029 rtol=2e-5) 1030 1031 1032class CudnnRNNTestParamsSize(test_util.TensorFlowTestCase): 1033 1034 def _TestOpaqueParamsSize(self, rnn_mode, num_layers, num_units, input_size, 1035 dtype, direction): 1036 logging.info("Testing one lstm param size with config: %s", locals()) 1037 model = CudnnTestModel( 1038 rnn_mode, 1039 num_layers, 1040 num_units, 1041 input_size, 1042 dtype=dtype, 1043 direction=direction) 1044 rnn = model.rnn 1045 1046 # Min param size estimate = sum(weights.size) + sum(biases.size) 1047 min_params_size = ( 1048 sum(map(np.prod, rnn.canonical_weight_shapes)) + 1049 sum(sp[0] for sp in rnn.canonical_bias_shapes)) 1050 1051 opaque_params = rnn.trainable_variables[0] 1052 with self.test_session(use_gpu=True, graph=ops.get_default_graph()): 1053 variables.global_variables_initializer().run() 1054 opaque_params_size_v = opaque_params.eval().size 1055 self.assertLessEqual(min_params_size, opaque_params_size_v) 1056 1057 @unittest.skipUnless(test.is_built_with_cuda(), 1058 "Test only applicable when running on GPUs") 1059 def testOpaqueParamsSize(self): 1060 test_configs = [ 1061 [4, 200, 200], 1062 [4, 200, 300], 1063 [4, 200, 100], 1064 [1, 100, 200], 1065 [2, 200, 100], 1066 [3, 200, 400], 1067 ] 1068 directions = [CUDNN_RNN_UNIDIRECTION, CUDNN_RNN_BIDIRECTION] 1069 dtype_list = [dtypes.float16, dtypes.float32, dtypes.float64] 1070 rnns = [CUDNN_LSTM, CUDNN_GRU, CUDNN_RNN_RELU, CUDNN_RNN_TANH] 1071 for (rnn, config, dtype, direction) in itertools.product( 1072 rnns, test_configs, dtype_list, directions): 1073 num_layers, num_units, input_size = config 1074 with ops.Graph().as_default(): 1075 self._TestOpaqueParamsSize(rnn, num_layers, num_units, input_size, 1076 dtype, direction) 1077 1078 1079class CudnnRNNTestTraining(test_util.TensorFlowTestCase): 1080 1081 def setUp(self): 1082 super(CudnnRNNTestTraining, self).setUp() 1083 self._reset_rnd_gen_state = os.environ.get("TF_CUDNN_RESET_RND_GEN_STATE", 1084 str(False)) 1085 self._rnn_use_v2 = os.environ.get("TF_CUDNN_RNN_USE_V2", "0") 1086 1087 def tearDown(self): 1088 super(CudnnRNNTestTraining, self).tearDown() 1089 os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = self._reset_rnd_gen_state 1090 os.environ["TF_CUDNN_RNN_USE_V2"] = self._rnn_use_v2 1091 1092 def _ComputeNumericGrad(self, sess, y, x, delta=1e-4, step=1): 1093 """Compute the numeric gradient of y wrt to x. 1094 1095 Args: 1096 sess: The TF session constructed with a graph containing x and y. 1097 y: A scalar TF Tensor in the graph constructed in sess. 1098 x: A TF Tensor in the graph constructed in sess. 1099 delta: Gradient checker's small perturbation of x[i]. 1100 step: Only compute numerical gradients for a subset of x values. 1101 I.e. dy/dx[i] is computed if i % step == 0. 1102 Returns: 1103 A Tensor of the same shape and dtype as x. If x[i] is not chosen 1104 to compute the numerical gradient dy/x[i], the corresponding 1105 value is set to 0. 1106 """ 1107 1108 x_data = sess.run(x) 1109 x_size = x_data.size 1110 x_shape = x_data.shape 1111 1112 numeric_grad = np.zeros(x_size, dtype=x_data.dtype) 1113 1114 for i in range(0, x_size, step): 1115 x_pos = x_data.copy() 1116 if x_size == 1: 1117 x_pos += delta 1118 else: 1119 x_pos.flat[i] += delta 1120 y_pos_feed_dict = dict([(x.name, x_pos)]) 1121 y_pos = sess.run(y, feed_dict=y_pos_feed_dict) 1122 1123 x_neg = x_data.copy() 1124 if x_size == 1: 1125 x_neg -= delta 1126 else: 1127 x_neg.flat[i] -= delta 1128 y_neg_feed_dict = dict([(x.name, x_neg)]) 1129 y_neg = sess.run(y, feed_dict=y_neg_feed_dict) 1130 numeric_grad[i] = (y_pos - y_neg) / (2 * delta) 1131 return numeric_grad.reshape(x_shape) 1132 1133 def _GetShape(self, sess, inputs): 1134 if not isinstance(inputs, collections.Iterable): 1135 return sess.run(array_ops.shape(inputs)) 1136 else: 1137 return sess.run([array_ops.shape(x) for x in inputs]) 1138 1139 def _GradientCheckFp16(self, sess, y, xs, num_samples, 1140 tolerance=1e-6, delta=1e-4): 1141 """Gradient check for Fp16. 1142 1143 Fp16 numerical gradients end up being zeros. Use a new way to check 1144 gradients: 1145 1146 Given multi-variant function: 1147 y = f(x1, x2, ... xn) 1148 delta_y = f(x1 + delta_x1, x2+delta_x2, ..., xn+delta_xn) - 1149 f(x1, x2, ..., xn) 1150 = f'(x1) * delta_x1 + f'(x2) * delta_x2 + .. + f'(xn) * delta_xn 1151 where: 1152 delta_xi are very small disturbance. 1153 f'(xi) is the gradient of y w.r.t xi. 1154 1155 The gradient check verifies the expected delta_y calculated by the above 1156 equation is close to the actual delta_y. 1157 Args: 1158 sess: tf.Session object. 1159 y: output tensor. 1160 xs: a tensor or a list of input tensors. 1161 num_samples: number of test samples to run. 1162 tolerance: error tolerance. 1163 delta: the order of magnititued of input disturbance to apply to calculate 1164 the output change w.r.t inputs. 1165 """ 1166 sym_grads = self._ComputeSymGrads(sess, y, xs) 1167 xs_shapes = self._GetShape(sess, xs) 1168 1169 x_vals = [sess.run(x) for x in xs] 1170 for _ in range(num_samples): 1171 delta_xs = [delta * np.random.rand(*shape.tolist()) 1172 for shape in xs_shapes] 1173 1174 feed_dict = {} 1175 for x, x_val, delta_x in zip(xs, x_vals, delta_xs): 1176 feed_dict[x] = x_val + delta_x 1177 actual_delta_y = (float(sess.run(y, feed_dict=feed_dict)) - 1178 float(sess.run(y))) 1179 1180 expected_delta_y = 0. 1181 for sym_grad, delta_x in zip(sym_grads, delta_xs): 1182 expected_delta_y += np.dot( 1183 sym_grad.astype(np.float32).flatten(), 1184 delta_x.astype(np.float32).flatten()) 1185 self.assertAllClose(expected_delta_y, actual_delta_y, 1186 atol=tolerance, rtol=tolerance) 1187 1188 def _GradientCheck(self, sess, y, xs, tolerance=1e-6, delta=1e-4): 1189 sym_grads = self._ComputeSymGrads(sess, y, xs) 1190 1191 num_grads = [self._ComputeNumericGrad(sess, y, x, delta) for x in xs] 1192 self.assertEqual(len(sym_grads), len(num_grads)) 1193 for x, sym, num in zip(xs, sym_grads, num_grads): 1194 logging.info("Comparing gradients for input: %s", x.name) 1195 self.assertFalse(np.any(np.isnan(sym))) 1196 self.assertFalse(np.any(np.isnan(num))) 1197 self.assertAllClose(sym, num, atol=tolerance, rtol=tolerance) 1198 1199 def _ComputeSymGrads(self, sess, y, xs): 1200 sym_grads_t = gradients.gradients(y, xs) 1201 return sess.run(sym_grads_t) 1202 1203 def _TestOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size, 1204 batch_size, seq_length, dir_count, dropout, dtype, 1205 use_v2, delta, tolerance): 1206 # Gradient checking runs two forward ops with almost the same input. Need to 1207 # make sure the drop patterns across the two runs are the same. 1208 logging.info("Training test with config: %s", locals()) 1209 os.environ["TF_CUDNN_RESET_RND_GEN_STATE"] = str(True) 1210 1211 np.random.seed(1234) 1212 random_seed.set_random_seed(5678) 1213 has_input_c = (rnn_mode == CUDNN_LSTM) 1214 direction = (CUDNN_RNN_UNIDIRECTION 1215 if dir_count == 1 else CUDNN_RNN_BIDIRECTION) 1216 if use_v2: 1217 os.environ["TF_CUDNN_RNN_USE_V2"] = "1" 1218 else: 1219 os.environ["TF_CUDNN_RNN_USE_V2"] = "0" 1220 model = CudnnTestModel( 1221 rnn_mode, 1222 num_layers, 1223 num_units, 1224 input_size, 1225 direction=direction, 1226 dropout=dropout, 1227 dtype=dtype, 1228 training=True, 1229 bias_initializer=init_ops.random_normal_initializer( 1230 mean=1., dtype=dtype)) 1231 rnn = model.rnn 1232 params = rnn.trainable_variables[0] 1233 1234 inputs = variables.Variable( 1235 random_ops.random_uniform([seq_length, batch_size, input_size], 1236 dtype=dtype), 1237 dtype=dtype).read_value() 1238 input_h = variables.Variable( 1239 random_ops.random_uniform( 1240 [num_layers * dir_count, batch_size, num_units], dtype=dtype), 1241 dtype=dtype).read_value() 1242 if has_input_c: 1243 input_c = variables.Variable( 1244 random_ops.random_uniform( 1245 [num_layers * dir_count, batch_size, num_units], dtype=dtype), 1246 dtype=dtype).read_value() 1247 initial_state = (input_h, input_c) 1248 else: 1249 initial_state = (input_h,) 1250 total_sum = model.FProp(inputs, initial_state, training=True) 1251 1252 with self.test_session(use_gpu=True, graph=ops.get_default_graph()) as sess: 1253 sess.run(variables.global_variables_initializer()) 1254 all_inputs = [inputs, params] 1255 for s in initial_state: 1256 all_inputs.append(s) 1257 if dtype == dtypes.float16: 1258 self._GradientCheckFp16( 1259 sess, total_sum, all_inputs, 1260 num_samples=FLAGS.grad_check_num_samples, 1261 tolerance=tolerance, delta=delta) 1262 else: 1263 for _ in range(FLAGS.grad_check_num_samples): 1264 # Each time choose a different set of inputs. 1265 sess.run(variables.global_variables_initializer()) 1266 self._GradientCheck( 1267 sess, total_sum, all_inputs, 1268 tolerance=tolerance, delta=delta) 1269 1270 def _TestSimpleTrainingHelper(self, rnn_mode, test_configs): 1271 dropouts = [0, 0.5, 1.] 1272 v2_options = [False, True] 1273 for config, dropout, use_v2 in itertools.product(test_configs, dropouts, 1274 v2_options): 1275 dtype = config.get("dtype", dtypes.float32) 1276 delta = config.get("delta", 1e-4) 1277 tolerance = config.get("tolerance", 1e-6) 1278 dir_count = config.get("dir_count", 1) 1279 shape = config["shape"] 1280 if dtype == dtypes.float64: 1281 # TODO(jamesqin): b/117848763 1282 use_v2 = False 1283 with ops.Graph().as_default(): 1284 self._TestOneSimpleTraining( 1285 rnn_mode, shape["num_layers"], shape["num_units"], 1286 shape["input_size"], shape["batch_size"], shape["seq_length"], 1287 dir_count, dropout, dtype, use_v2, delta, tolerance) 1288 1289 @unittest.skipUnless(test.is_built_with_cuda(), 1290 "Test only applicable when running on GPUs") 1291 def testSimpleTrainingLSTMFp64(self): 1292 test_configs = [ 1293 { 1294 "dtype": dtypes.float64, 1295 "tolerance": 5e-6, 1296 "shape": { 1297 "num_layers": 2, 1298 "num_units": 3, 1299 "input_size": 4, 1300 "batch_size": 3, 1301 "seq_length": 4, 1302 }, 1303 }, 1304 ] 1305 self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) 1306 1307 @unittest.skipUnless(test.is_built_with_cuda(), 1308 "Test only applicable when running on GPUs") 1309 def testSimpleTrainingLSTMFp32(self): 1310 test_configs = [ 1311 { 1312 "dtype": dtypes.float32, 1313 "delta": 1e-4, 1314 "tolerance": 9e-2, 1315 "shape": { 1316 "num_layers": 2, 1317 "num_units": 3, 1318 "input_size": 4, 1319 "batch_size": 3, 1320 "seq_length": 4, 1321 }, 1322 }, 1323 ] 1324 self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) 1325 1326 @unittest.skipUnless(test.is_built_with_cuda(), 1327 "Test only applicable when running on GPUs") 1328 def testSimpleTrainingLSTMFp16(self): 1329 test_configs = [ 1330 { 1331 "dtype": dtypes.float16, 1332 "delta": 1e-3, 1333 "tolerance": 9e-2, 1334 "shape": { 1335 "num_layers": 2, 1336 "num_units": 3, 1337 "input_size": 4, 1338 "batch_size": 3, 1339 "seq_length": 4, 1340 }, 1341 }, 1342 { 1343 "dtype": dtypes.float16, 1344 "delta": 1e-2, 1345 "tolerance": 9e-2, 1346 "shape": { 1347 "num_layers": 2, 1348 "num_units": 6, 1349 "input_size": 8, 1350 "batch_size": 6, 1351 "seq_length": 4, 1352 }, 1353 }, 1354 ] 1355 self._TestSimpleTrainingHelper(CUDNN_LSTM, test_configs) 1356 1357 @unittest.skipUnless(test.is_built_with_cuda(), 1358 "Test only applicable when running on GPUs") 1359 def testSimpleTrainingGRUFp64(self): 1360 test_configs = [ 1361 { 1362 "dtype": dtypes.float64, 1363 "tolerance": 5e-6, 1364 "shape": { 1365 "num_layers": 2, 1366 "num_units": 3, 1367 "input_size": 4, 1368 "batch_size": 3, 1369 "seq_length": 4, 1370 } 1371 }, 1372 ] 1373 self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) 1374 1375 @unittest.skipUnless(test.is_built_with_cuda(), 1376 "Test only applicable when running on GPUs") 1377 def testSimpleTrainingGRUFp32(self): 1378 test_configs = [ 1379 { 1380 "dtype": dtypes.float32, 1381 "delta": 1e-3, 1382 "tolerance": 4e-3, 1383 "shape": { 1384 "num_layers": 2, 1385 "num_units": 3, 1386 "input_size": 4, 1387 "batch_size": 3, 1388 "seq_length": 4, 1389 }, 1390 }, 1391 ] 1392 self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) 1393 1394 @unittest.skipUnless(test.is_built_with_cuda(), 1395 "Test only applicable when running on GPUs") 1396 def testSimpleTrainingGRUFp16(self): 1397 test_configs = [ 1398 { 1399 "dtype": dtypes.float16, 1400 "delta": 2e-3, 1401 "tolerance": 6e-2, 1402 "shape": { 1403 "num_layers": 2, 1404 "num_units": 3, 1405 "input_size": 4, 1406 "batch_size": 3, 1407 "seq_length": 4, 1408 }, 1409 }, 1410 ] 1411 self._TestSimpleTrainingHelper(CUDNN_GRU, test_configs) 1412 1413 @unittest.skipUnless(test.is_built_with_cuda(), 1414 "Test only applicable when running on GPUs") 1415 def testSimpleTrainingRNNTanhFp64(self): 1416 test_configs = [ 1417 { 1418 "dtype": dtypes.float64, 1419 "tolerance": 5e-6, 1420 "shape": { 1421 "num_layers": 2, 1422 "num_units": 3, 1423 "input_size": 4, 1424 "batch_size": 3, 1425 "seq_length": 4, 1426 }, 1427 }, 1428 ] 1429 self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) 1430 1431 @unittest.skipUnless(test.is_built_with_cuda(), 1432 "Test only applicable when running on GPUs") 1433 def testSimpleTrainingRNNTanhFp32(self): 1434 test_configs = [ 1435 { 1436 "dtype": dtypes.float32, 1437 "delta": 1e-3, 1438 "tolerance": 5e-3, 1439 "shape": { 1440 "num_layers": 2, 1441 "num_units": 3, 1442 "input_size": 4, 1443 "batch_size": 3, 1444 "seq_length": 4, 1445 }, 1446 }, 1447 ] 1448 self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) 1449 1450 @unittest.skipUnless(test.is_built_with_cuda(), 1451 "Test only applicable when running on GPUs") 1452 def testSimpleTrainingRNNTanhFp16(self): 1453 test_configs = [ 1454 { 1455 "dtype": dtypes.float16, 1456 "delta": 1e-3, 1457 "tolerance": 5e-2, 1458 "shape": { 1459 "num_layers": 2, 1460 "num_units": 3, 1461 "input_size": 4, 1462 "batch_size": 3, 1463 "seq_length": 4, 1464 }, 1465 }, 1466 ] 1467 self._TestSimpleTrainingHelper(CUDNN_RNN_TANH, test_configs) 1468 1469 @unittest.skipUnless(test.is_built_with_cuda(), 1470 "Test only applicable when running on GPUs") 1471 def testSimpleTrainingRNNReluFp64(self): 1472 test_configs = [ 1473 { 1474 "dtype": dtypes.float64, 1475 "tolerance": 5e-6, 1476 "shape": { 1477 "num_layers": 2, 1478 "num_units": 3, 1479 "input_size": 4, 1480 "batch_size": 3, 1481 "seq_length": 4, 1482 }, 1483 }, 1484 ] 1485 self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) 1486 1487 @unittest.skipUnless(test.is_built_with_cuda(), 1488 "Test only applicable when running on GPUs") 1489 def testSimpleTrainingRNNReluFp32(self): 1490 test_configs = [ 1491 { 1492 "dtype": dtypes.float32, 1493 "delta": 1e-4, 1494 "tolerance": 3e-1, 1495 "shape": { 1496 "num_layers": 2, 1497 "num_units": 3, 1498 "input_size": 4, 1499 "batch_size": 3, 1500 "seq_length": 4, 1501 }, 1502 }, 1503 ] 1504 self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) 1505 1506 @unittest.skipUnless(test.is_built_with_cuda(), 1507 "Test only applicable when running on GPUs") 1508 def testSimpleTrainingRNNReluFp16(self): 1509 test_configs = [ 1510 { 1511 "dtype": dtypes.float16, 1512 "delta": 1e-3, 1513 "tolerance": 7e-2, 1514 "shape": { 1515 "num_layers": 2, 1516 "num_units": 3, 1517 "input_size": 4, 1518 "batch_size": 3, 1519 "seq_length": 4, 1520 }, 1521 }, 1522 ] 1523 self._TestSimpleTrainingHelper(CUDNN_RNN_RELU, test_configs) 1524 1525 1526if __name__ == "__main__": 1527 argv0 = sys.argv[0] 1528 parser = argparse.ArgumentParser() 1529 parser.add_argument( 1530 "--grad_check_num_samples", 1531 type=int, 1532 default=1, 1533 help="Number of samples to run for gradient check.") 1534 FLAGS, unparsed = parser.parse_known_args() 1535 sys.argv = [argv0] + unparsed 1536 googletest.main() 1537